Merge remote-tracking branch 'origin/master' into fix-slowdown-count-side-effect

This commit is contained in:
Igor Nikonov 2024-02-20 11:29:04 +00:00
commit f40321f8a6
19 changed files with 549 additions and 278 deletions

View File

@ -10,11 +10,62 @@ The ClickHouse server can be configured with configuration files in XML or YAML
It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.
## Overriding Configuration {#override}
## Merging Configuration {#merging}
The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`.
- Attribute `replace` means that the element is replaced by the specified one.
- Attribute `remove` means that the element is deleted.
Two configuration files (usually the main configuration file and another configuration files from `config.d/`) are merged as follows:
- If a node (i.e. a path leading to an element) appears in both files and does not have attributes `replace` or `remove`, it is included in the merged configuration file and children from both nodes are included and merged recursively.
- If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included.
- If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted).
Example:
```xml
<!-- config.xml -->
<clickhouse>
<config_a>
<setting_1>1</setting_1>
</config_a>
<config_b>
<setting_2>2</setting_2>
</config_b>
<config_c>
<setting_3>3</setting_3>
</config_c>
</clickhouse>
```
and
```xml
<!-- config.d/other_config.xml -->
<clickhouse>
<config_a>
<setting_4>4</setting_4>
</config_a>
<config_b replace="replace">
<setting_5>5</setting_5>
</config_b>
<config_c remove="remove">
<setting_6>6</setting_6>
</config_c>
</clickhouse>
```
generates merged configuration file:
```xml
<clickhouse>
<config_a>
<setting_1>1</setting_1>
<setting_4>4</setting_4>
</config_a>
<config_b>
<setting_5>5</setting_5>
</config_b>
</clickhouse>
```
To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`.
@ -125,7 +176,7 @@ Users configuration can be split into separate files similar to `config.xml` and
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that.
Note that configuration files are first [merged](#merging) taking into account settings, and includes are processed after that.
## XML example {#example}

View File

@ -509,7 +509,7 @@ Result:
## cosineDistance
Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors.
Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The smaller the returned value is, the more similar are the vectors.
**Syntax**

View File

@ -146,9 +146,7 @@ struct AggregateFunctionSumData
size_t count = end - start;
const auto * end_ptr = ptr + count;
if constexpr (
(is_integer<T> && !is_big_int_v<T>)
|| (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
if constexpr ((is_integer<T> || is_decimal<T>) && !is_over_big_int<T>)
{
/// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
/// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@ -163,8 +161,39 @@ struct AggregateFunctionSumData
Impl::add(sum, local_sum);
return;
}
else if constexpr (is_over_big_int<T>)
{
/// Use a mask to discard or keep the value to reduce branch miss.
/// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse.
using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
alignas(64) const MaskType masks[2] = {0, -1};
T local_sum{};
while (ptr < end_ptr)
{
Value v = *ptr;
if constexpr (!add_if_zero)
{
if constexpr (is_integer<T>)
v &= masks[!!*condition_map];
else
v.value &= masks[!!*condition_map];
}
else
{
if constexpr (is_integer<T>)
v &= masks[!*condition_map];
else
v.value &= masks[!*condition_map];
}
if constexpr (std::is_floating_point_v<T>)
Impl::add(local_sum, v);
++ptr;
++condition_map;
}
Impl::add(sum, local_sum);
return;
}
else if constexpr (std::is_floating_point_v<T>)
{
/// For floating point we use a similar trick as above, except that now we reinterpret the floating point number as an unsigned
/// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)

View File

@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
#endif
}
union CPUInfo
{
UInt32 info[4];
struct Registers
{
UInt32 eax;
UInt32 ebx;
UInt32 ecx;
UInt32 edx;
} registers;
inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
};
inline bool haveRDTSCP() noexcept
{
return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
}
inline bool haveSSE() noexcept
{
return (CPUInfo(0x1).registers.edx >> 25) & 1u;
}
inline bool haveSSE2() noexcept
{
return (CPUInfo(0x1).registers.edx >> 26) & 1u;
}
inline bool haveSSE3() noexcept
{
return CPUInfo(0x1).registers.ecx & 1u;
}
inline bool havePCLMUL() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
}
inline bool haveSSSE3() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
}
inline bool haveSSE41() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
}
inline bool haveSSE42() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
}
inline bool haveF16C() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
}
inline bool havePOPCNT() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
}
inline bool haveAES() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
}
inline bool haveXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
}
inline bool haveOSXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
}
inline bool haveAVX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// https://bugs.chromium.org/p/chromium/issues/detail?id=375968
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
#else
return false;
#endif
}
inline bool haveFMA() noexcept
{
return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
}
inline bool haveAVX2() noexcept
{
return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
}
inline bool haveBMI1() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
}
inline bool haveBMI2() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
}
inline bool haveAVX512F() noexcept
{
#if defined(__x86_64__)
// https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
&& CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present
&& ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
#else
return false;
#endif
}
inline bool haveAVX512DQ() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
}
inline bool haveRDSEED() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
}
inline bool haveADX() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
}
inline bool haveAVX512IFMA() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
}
inline bool havePCOMMIT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
}
inline bool haveCLFLUSHOPT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
}
inline bool haveCLWB() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
}
inline bool haveAVX512PF() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
}
inline bool haveAVX512ER() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
}
inline bool haveAVX512CD() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
}
inline bool haveSHA() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
}
inline bool haveAVX512BW() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
}
inline bool haveAVX512VL() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
}
inline bool havePREFETCHWT1() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
}
inline bool haveAVX512VBMI() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
}
inline bool haveAVX512VBMI2() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
}
inline bool haveRDRAND() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
}
inline bool haveAMX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
return haveOSXSAVE() // implies haveXSAVE()
&& ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS
#else
return false;
#endif
}
inline bool haveAMXBF16() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit
}
inline bool haveAMXTILE() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit
}
inline bool haveAMXINT8() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit
}
#define CPU_ID_ENUMERATE(OP) \
OP(SSE) \
OP(SSE2) \
@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
OP(AMXTILE) \
OP(AMXINT8)
union CPUInfo
{
UInt32 info[4];
struct Registers
{
UInt32 eax;
UInt32 ebx;
UInt32 ecx;
UInt32 edx;
} registers;
inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
};
#define DEF_NAME(X) inline bool have##X() noexcept;
CPU_ID_ENUMERATE(DEF_NAME)
#undef DEF_NAME
bool haveRDTSCP() noexcept
{
return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
}
bool haveSSE() noexcept
{
return (CPUInfo(0x1).registers.edx >> 25) & 1u;
}
bool haveSSE2() noexcept
{
return (CPUInfo(0x1).registers.edx >> 26) & 1u;
}
bool haveSSE3() noexcept
{
return CPUInfo(0x1).registers.ecx & 1u;
}
bool havePCLMUL() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
}
bool haveSSSE3() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
}
bool haveSSE41() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
}
bool haveSSE42() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
}
bool haveF16C() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
}
bool havePOPCNT() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
}
bool haveAES() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
}
bool haveXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
}
bool haveOSXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
}
bool haveAVX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// https://bugs.chromium.org/p/chromium/issues/detail?id=375968
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
#else
return false;
#endif
}
bool haveFMA() noexcept
{
return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
}
bool haveAVX2() noexcept
{
return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
}
bool haveBMI1() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
}
bool haveBMI2() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
}
bool haveAVX512F() noexcept
{
#if defined(__x86_64__)
// https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
&& CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present
&& ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
#else
return false;
#endif
}
bool haveAVX512DQ() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
}
bool haveRDSEED() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
}
bool haveADX() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
}
bool haveAVX512IFMA() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
}
bool havePCOMMIT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
}
bool haveCLFLUSHOPT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
}
bool haveCLWB() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
}
bool haveAVX512PF() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
}
bool haveAVX512ER() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
}
bool haveAVX512CD() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
}
bool haveSHA() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
}
bool haveAVX512BW() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
}
bool haveAVX512VL() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
}
bool havePREFETCHWT1() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
}
bool haveAVX512VBMI() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
}
bool haveAVX512VBMI2() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
}
bool haveRDRAND() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
}
inline bool haveAMX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
return haveOSXSAVE() // implies haveXSAVE()
&& ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS
#else
return false;
#endif
}
bool haveAMXBF16() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit
}
bool haveAMXTILE() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit
}
bool haveAMXINT8() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit
}
struct CPUFlagsCache
{
#define DEF_NAME(X) static inline bool have_##X = have##X();

View File

@ -427,9 +427,7 @@ TEST(AsyncLoader, CancelExecutingTask)
}
}
// This test is disabled due to `MemorySanitizer: use-of-uninitialized-value` issue in `collectSymbolsFromProgramHeaders` function
// More details: https://github.com/ClickHouse/ClickHouse/pull/48923#issuecomment-1545415482
TEST(AsyncLoader, DISABLED_JobFailure)
TEST(AsyncLoader, JobFailure)
{
AsyncLoaderTest t;
t.loader.start();

View File

@ -85,6 +85,7 @@ namespace SettingsChangesHistory
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.2", {
{"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
{"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"},
{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},

View File

@ -1,9 +1,9 @@
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeInterval.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNothing.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/ITupleFunction.h>
@ -1364,11 +1364,11 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (getReturnTypeImpl(arguments)->isNullable())
{
return DataTypeNullable(std::make_shared<DataTypeNothing>())
.createColumnConstWithDefaultValue(input_rows_count);
}
/// TODO: cosineDistance does not support nullable arguments
/// https://github.com/ClickHouse/ClickHouse/pull/27933#issuecomment-916670286
auto return_type = getReturnTypeImpl(arguments);
if (return_type->isNullable())
return return_type->createColumnConstWithDefaultValue(input_rows_count);
FunctionDotProduct dot(context);
ColumnWithTypeAndName dot_result{dot.executeImpl(arguments, DataTypePtr(), input_rows_count),

View File

@ -722,7 +722,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
#ifndef NDEBUG
#if 0
/// Verify that AST formatting is consistent:
/// If you format AST, parse it back, and format it again, you get the same string.

View File

@ -347,7 +347,7 @@ const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
{
std::scoped_lock lock(index_mutex);
if (!index_loaded)
loadIndex(lock);
loadIndex();
index_loaded = true;
return index;
}
@ -569,6 +569,7 @@ void IMergeTreeDataPart::removeIfNeeded()
UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const
{
std::scoped_lock lock(index_mutex);
UInt64 res = 0;
for (const ColumnPtr & column : index)
res += column->byteSize();
@ -577,6 +578,7 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const
UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const
{
std::scoped_lock lock(index_mutex);
UInt64 res = 0;
for (const ColumnPtr & column : index)
res += column->allocatedBytes();
@ -828,7 +830,7 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co
{
}
void IMergeTreeDataPart::loadIndex(std::scoped_lock<std::mutex> &) const
void IMergeTreeDataPart::loadIndex() const
{
/// Memory for index must not be accounted as memory usage for query, because it belongs to a table.
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;

View File

@ -3,6 +3,7 @@
#include <IO/WriteSettings.h>
#include <Core/Block.h>
#include <base/types.h>
#include <base/defines.h>
#include <Core/NamesAndTypes.h>
#include <Storages/IStorage.h>
#include <Storages/LightweightDeleteDescription.h>
@ -565,8 +566,8 @@ protected:
/// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
mutable std::mutex index_mutex;
mutable Index index;
mutable bool index_loaded = false;
mutable Index index TSA_GUARDED_BY(index_mutex);
mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false;
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
ColumnSize total_columns_size;
@ -664,7 +665,7 @@ private:
virtual void appendFilesOfIndexGranularity(Strings & files) const;
/// Loads the index file.
void loadIndex(std::scoped_lock<std::mutex> &) const;
void loadIndex() const TSA_REQUIRES(index_mutex);
void appendFilesOfIndex(Strings & files) const;

View File

@ -73,6 +73,7 @@ static void splitAndModifyMutationCommands(
LoggerPtr log)
{
auto part_columns = part->getColumnsDescription();
const auto & table_columns = metadata_snapshot->getColumns();
if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
{
@ -81,9 +82,19 @@ static void splitAndModifyMutationCommands(
for (const auto & command : commands)
{
if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
{
/// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
/// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
{
for_interpreter.push_back(command);
mutated_columns.emplace(command.column_name);
}
}
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
|| command.type == MutationCommand::Type::MATERIALIZE_COLUMN
|| command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
|| command.type == MutationCommand::Type::MATERIALIZE_TTL
|| command.type == MutationCommand::Type::DELETE
@ -93,9 +104,6 @@ static void splitAndModifyMutationCommands(
for_interpreter.push_back(command);
for (const auto & [column_name, expr] : command.column_to_update_expression)
mutated_columns.emplace(column_name);
if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
mutated_columns.emplace(command.column_name);
}
else if (command.type == MutationCommand::Type::DROP_INDEX
|| command.type == MutationCommand::Type::DROP_PROJECTION
@ -205,8 +213,15 @@ static void splitAndModifyMutationCommands(
{
for (const auto & command : commands)
{
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_COLUMN
if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
{
/// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
/// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
for_interpreter.push_back(command);
}
else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
|| command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
|| command.type == MutationCommand::Type::MATERIALIZE_TTL

View File

@ -17,6 +17,13 @@
<query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>
<query>select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000)</query>
<!-- Create a table with ~20% null values. Make it random so the branch predictor doesn't do all the work -->
<create_query>CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory</create_query>
<fill_query>INSERT INTO nullfloat32

View File

@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s;
ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2);
SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values;
ALTER TABLE tmp MATERIALIZE COLUMN s;
ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3);
SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;

View File

@ -0,0 +1,45 @@
DEFAULT expressions
-- Compact parts
Before materialize
1 1
2 54321
After materialize
1 1
2 54321
-- Wide parts
Before materialize
1 1
2 54321
After materialize
1 1
2 54321
-- Nullable column != physically absent
Before materialize
1 1
2 \N
3 54321
After materialize
1 1
2 \N
3 54321
-- Parts with renamed column
Before materialize
1 1
2 54321
After rename
1 1
2 54321
After materialize
1 1
2 54321
MATERIALIZED expressions
-- Compact parts
Before materialize
1 54321
After materialize
1 65432
-- Compact parts
Before materialize
1 54321
After materialize
1 65432

View File

@ -0,0 +1,85 @@
SET mutations_sync = 2;
DROP TABLE IF EXISTS tab;
-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN>
SELECT 'DEFAULT expressions';
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Wide parts';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Nullable column != physically absent';
CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id, dflt) VALUES (2, NULL);
INSERT INTO tab (id) VALUES (3);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Parts with renamed column';
CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id, dflt) VALUES (1, 1);
INSERT INTO tab (id) VALUES (2);
SELECT 'Before materialize';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab RENAME COLUMN dflt TO dflt2;
SELECT 'After rename';
SELECT * FROM tab ORDER BY id;
ALTER TABLE tab MATERIALIZE COLUMN dflt2;
SELECT 'After materialize';
SELECT * FROM tab ORDER BY id;
DROP TABLE tab;
-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime.
SELECT 'MATERIALIZED expressions';
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id;
INSERT INTO tab (id) VALUES (1);
SELECT 'Before materialize';
SELECT id, mtrl FROM tab ORDER BY id;
ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
ALTER TABLE tab MATERIALIZE COLUMN mtrl;
SELECT 'After materialize';
SELECT id, mtrl FROM tab ORDER BY id;
DROP TABLE tab;
SELECT '-- Compact parts';
CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO tab (id) VALUES (1);
SELECT 'Before materialize';
SELECT id, mtrl FROM tab ORDER BY id;
ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
ALTER TABLE tab MATERIALIZE COLUMN mtrl;
SELECT 'After materialize';
SELECT id, mtrl FROM tab ORDER BY id;
DROP TABLE tab;

View File

@ -0,0 +1,12 @@
49500
49500
49500
49500
49500
49500
450000
450000
450000
450000
450000
450000

View File

@ -0,0 +1,14 @@
select sumIf(number::Int128, number % 10 == 0) from numbers(1000);
select sumIf(number::UInt128, number % 10 == 0) from numbers(1000);
select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);
-- Test when the condition is neither 0 nor 1
select sumIf(number::Int128, number % 10) from numbers(1000);
select sumIf(number::UInt128, number % 10) from numbers(1000);
select sumIf(number::Int256, number % 10) from numbers(1000);
select sumIf(number::UInt256, number % 10) from numbers(1000);
select sumIf(number::Decimal128(3), number % 10) from numbers(1000);
select sumIf(number::Decimal256(3), number % 10) from numbers(1000);

View File

@ -0,0 +1,11 @@
\N
\N
\N
\N
\N
\N
\N
\N
\N
\N
\N

View File

@ -0,0 +1,3 @@
-- https://github.com/ClickHouse/ClickHouse/issues/59596
SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1));
SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)) from numbers(10);