Merge remote-tracking branch 'origin/master' into fix-slowdown-count-side-effect

2024-09-20 00:30:49 +00:00 · 2024-02-20 11:29:04 +00:00 · 2024-02-20 11:29:04 +00:00 · f40321f8a6
commit f40321f8a6
parent bb5a6dd8d3 9abd28625f
19 changed files with 549 additions and 278 deletions
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@ -10,11 +10,62 @@ The ClickHouse server can be configured with configuration files in XML or YAML

 It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.

-## Overriding Configuration {#override}
+## Merging Configuration {#merging}

-The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`.
- Attribute `replace` means that the element is replaced by the specified one.
- Attribute `remove` means that the element is deleted.
+Two configuration files (usually the main configuration file and another configuration files from `config.d/`) are merged as follows:
+
+- If a node (i.e. a path leading to an element) appears in both files and does not have attributes `replace` or `remove`, it is included in the merged configuration file and children from both nodes are included and merged recursively.
+- If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included.
+- If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted).
+
+Example:
+
+
+```xml
+<!-- config.xml -->
+<clickhouse>
+    <config_a>
+        <setting_1>1</setting_1>
+    </config_a>
+    <config_b>
+        <setting_2>2</setting_2>
+    </config_b>
+    <config_c>
+        <setting_3>3</setting_3>
+    </config_c>
+</clickhouse>
+```
+
+and
+
+```xml
+<!-- config.d/other_config.xml -->
+<clickhouse>
+    <config_a>
+        <setting_4>4</setting_4>
+    </config_a>
+    <config_b replace="replace">
+        <setting_5>5</setting_5>
+    </config_b>
+    <config_c remove="remove">
+        <setting_6>6</setting_6>
+    </config_c>
+</clickhouse>
+```
+
+generates merged configuration file:
+
+```xml
+<clickhouse>
+    <config_a>
+        <setting_1>1</setting_1>
+        <setting_4>4</setting_4>
+    </config_a>
+    <config_b>
+        <setting_5>5</setting_5>
+    </config_b>
+</clickhouse>
+```

 To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`.

@ -125,7 +176,7 @@ Users configuration can be split into separate files similar to `config.xml` and
 Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
 Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.

-Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that.
+Note that configuration files are first [merged](#merging) taking into account settings, and includes are processed after that.

 ## XML example {#example}

--- a/docs/en/sql-reference/functions/distance-functions.md
+++ b/docs/en/sql-reference/functions/distance-functions.md
@ -509,7 +509,7 @@ Result:

 ## cosineDistance

-Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors.
+Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The smaller the returned value is, the more similar are the vectors.

 **Syntax**

--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@ -146,9 +146,7 @@ struct AggregateFunctionSumData
        size_t count = end - start;
        const auto * end_ptr = ptr + count;

-        if constexpr (
-            (is_integer<T> && !is_big_int_v<T>)
-            || (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
+        if constexpr ((is_integer<T> || is_decimal<T>) && !is_over_big_int<T>)
        {
            /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
            /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@ -163,8 +161,39 @@ struct AggregateFunctionSumData
            Impl::add(sum, local_sum);
            return;
        }
+        else if constexpr (is_over_big_int<T>)
+        {
+            /// Use a mask to discard or keep the value to reduce branch miss.
+            /// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse.
+            using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
+            alignas(64) const MaskType masks[2] = {0, -1};
+            T local_sum{};
+            while (ptr < end_ptr)
+            {
+                Value v = *ptr;
+                if constexpr (!add_if_zero)
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[!!*condition_map];
+                    else
+                        v.value &= masks[!!*condition_map];
+                }
+                else
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[!*condition_map];
+                    else
+                        v.value &= masks[!*condition_map];
+                }

-        if constexpr (std::is_floating_point_v<T>)
+                Impl::add(local_sum, v);
+                ++ptr;
+                ++condition_map;
+            }
+            Impl::add(sum, local_sum);
+            return;
+        }
+        else if constexpr (std::is_floating_point_v<T>)
        {
            /// For floating point we use a similar trick as above, except that now we  reinterpret the floating point number as an unsigned
            /// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)
--- a/src/Common/CPUID.h
+++ b/src/Common/CPUID.h
@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
 #endif
 }

+union CPUInfo
+{
+    UInt32 info[4];
+
+    struct Registers
+    {
+        UInt32 eax;
+        UInt32 ebx;
+        UInt32 ecx;
+        UInt32 edx;
+    } registers;
+
+    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
+
+    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
+};
+
+inline bool haveRDTSCP() noexcept
+{
+    return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
+}
+
+inline bool haveSSE() noexcept
+{
+    return (CPUInfo(0x1).registers.edx >> 25) & 1u;
+}
+
+inline bool haveSSE2() noexcept
+{
+    return (CPUInfo(0x1).registers.edx >> 26) & 1u;
+}
+
+inline bool haveSSE3() noexcept
+{
+    return CPUInfo(0x1).registers.ecx & 1u;
+}
+
+inline bool havePCLMUL() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
+}
+
+inline bool haveSSSE3() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
+}
+
+inline bool haveSSE41() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
+}
+
+inline bool haveSSE42() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
+}
+
+inline bool haveF16C() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
+}
+
+inline bool havePOPCNT() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
+}
+
+inline bool haveAES() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
+}
+
+inline bool haveXSAVE() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
+}
+
+inline bool haveOSXSAVE() noexcept
+{
+    return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
+}
+
+inline bool haveAVX() noexcept
+{
+#if defined(__x86_64__)
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+    // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
+           && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
+#else
+    return false;
+#endif
+}
+
+inline bool haveFMA() noexcept
+{
+    return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
+}
+
+inline bool haveAVX2() noexcept
+{
+    return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
+}
+
+inline bool haveBMI1() noexcept
+{
+    return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
+}
+
+inline bool haveBMI2() noexcept
+{
+    return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
+}
+
+inline bool haveAVX512F() noexcept
+{
+#if defined(__x86_64__)
+    // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
+           && ((our_xgetbv(0) >> 5) & 7u) == 7u       // ZMM state is enabled by OS
+           && CPUInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
+           && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
+#else
+    return false;
+#endif
+}
+
+inline bool haveAVX512DQ() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
+}
+
+inline bool haveRDSEED() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
+}
+
+inline bool haveADX() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
+}
+
+inline bool haveAVX512IFMA() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
+}
+
+inline bool havePCOMMIT() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
+}
+
+inline bool haveCLFLUSHOPT() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
+}
+
+inline bool haveCLWB() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
+}
+
+inline bool haveAVX512PF() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
+}
+
+inline bool haveAVX512ER() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
+}
+
+inline bool haveAVX512CD() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
+}
+
+inline bool haveSHA() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
+}
+
+inline bool haveAVX512BW() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
+}
+
+inline bool haveAVX512VL() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
+}
+
+inline bool havePREFETCHWT1() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
+}
+
+inline bool haveAVX512VBMI() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
+}
+
+inline bool haveAVX512VBMI2() noexcept
+{
+    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
+}
+
+inline bool haveRDRAND() noexcept
+{
+    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
+}
+
+inline bool haveAMX() noexcept
+{
+#if defined(__x86_64__)
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+    return haveOSXSAVE()                           // implies haveXSAVE()
+           && ((our_xgetbv(0) >> 17) & 0x3) == 0x3;        // AMX state are enabled by OS
+#else
+    return false;
+#endif
+}
+
+inline bool haveAMXBF16() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
+}
+
+inline bool haveAMXTILE() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
+}
+
+inline bool haveAMXINT8() noexcept
+{
+    return haveAMX()
+            && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
+}
+
 #define CPU_ID_ENUMERATE(OP) \
    OP(SSE)                  \
    OP(SSE2)                 \
@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
    OP(AMXTILE)              \
    OP(AMXINT8)

-union CPUInfo
-{
-    UInt32 info[4];
-
-    struct Registers
-    {
-        UInt32 eax;
-        UInt32 ebx;
-        UInt32 ecx;
-        UInt32 edx;
-    } registers;
-
-    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
-
-    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
-};
-
-#define DEF_NAME(X) inline bool have##X() noexcept;
-    CPU_ID_ENUMERATE(DEF_NAME)
-#undef DEF_NAME
-
-bool haveRDTSCP() noexcept
-{
-    return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
-}
-
-bool haveSSE() noexcept
-{
-    return (CPUInfo(0x1).registers.edx >> 25) & 1u;
-}
-
-bool haveSSE2() noexcept
-{
-    return (CPUInfo(0x1).registers.edx >> 26) & 1u;
-}
-
-bool haveSSE3() noexcept
-{
-    return CPUInfo(0x1).registers.ecx & 1u;
-}
-
-bool havePCLMUL() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
-}
-
-bool haveSSSE3() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
-}
-
-bool haveSSE41() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
-}
-
-bool haveSSE42() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
-}
-
-bool haveF16C() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
-}
-
-bool havePOPCNT() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
-}
-
-bool haveAES() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
-}
-
-bool haveXSAVE() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
-}
-
-bool haveOSXSAVE() noexcept
-{
-    return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
-}
-
-bool haveAVX() noexcept
-{
-#if defined(__x86_64__)
-    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-    // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
-           && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
-#else
-    return false;
-#endif
-}
-
-bool haveFMA() noexcept
-{
-    return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
-}
-
-bool haveAVX2() noexcept
-{
-    return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
-}
-
-bool haveBMI1() noexcept
-{
-    return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
-}
-
-bool haveBMI2() noexcept
-{
-    return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
-}
-
-bool haveAVX512F() noexcept
-{
-#if defined(__x86_64__)
-    // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && (our_xgetbv(0) & 6u) == 6u              // XMM state and YMM state are enabled by OS
-           && ((our_xgetbv(0) >> 5) & 7u) == 7u       // ZMM state is enabled by OS
-           && CPUInfo(0x0).registers.eax >= 0x7          // leaf 7 is present
-           && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
-#else
-    return false;
-#endif
-}
-
-bool haveAVX512DQ() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
-}
-
-bool haveRDSEED() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
-}
-
-bool haveADX() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
-}
-
-bool haveAVX512IFMA() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
-}
-
-bool havePCOMMIT() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
-}
-
-bool haveCLFLUSHOPT() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
-}
-
-bool haveCLWB() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
-}
-
-bool haveAVX512PF() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
-}
-
-bool haveAVX512ER() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
-}
-
-bool haveAVX512CD() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
-}
-
-bool haveSHA() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
-}
-
-bool haveAVX512BW() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
-}
-
-bool haveAVX512VL() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
-}
-
-bool havePREFETCHWT1() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
-}
-
-bool haveAVX512VBMI() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
-}
-
-bool haveAVX512VBMI2() noexcept
-{
-    return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
-}
-
-bool haveRDRAND() noexcept
-{
-    return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
-}
-
-inline bool haveAMX() noexcept
-{
-#if defined(__x86_64__)
-    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-    return haveOSXSAVE()                           // implies haveXSAVE()
-           && ((our_xgetbv(0) >> 17) & 0x3) == 0x3;        // AMX state are enabled by OS
-#else
-    return false;
-#endif
-}
-
-bool haveAMXBF16() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u);  // AMX-BF16 bit
-}
-
-bool haveAMXTILE() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u);  // AMX-TILE bit
-}
-
-bool haveAMXINT8() noexcept
-{
-    return haveAMX()
-            && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u);  // AMX-INT8 bit
-}
-
 struct CPUFlagsCache
 {
 #define DEF_NAME(X) static inline bool have_##X = have##X();
--- a/src/Common/tests/gtest_async_loader.cpp
+++ b/src/Common/tests/gtest_async_loader.cpp
@ -427,9 +427,7 @@ TEST(AsyncLoader, CancelExecutingTask)
    }
 }

-// This test is disabled due to `MemorySanitizer: use-of-uninitialized-value` issue in `collectSymbolsFromProgramHeaders` function
-// More details: https://github.com/ClickHouse/ClickHouse/pull/48923#issuecomment-1545415482
-TEST(AsyncLoader, DISABLED_JobFailure)
+TEST(AsyncLoader, JobFailure)
 {
    AsyncLoaderTest t;
    t.loader.start();
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -85,6 +85,7 @@ namespace SettingsChangesHistory
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
    {"24.2", {
+              {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
              {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"},
              {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
--- a/src/Functions/vectorFunctions.cpp
+++ b/src/Functions/vectorFunctions.cpp
@ -1,9 +1,9 @@
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeNothing.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/ITupleFunction.h>
@ -1364,11 +1364,11 @@ public:

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
    {
-        if (getReturnTypeImpl(arguments)->isNullable())
-        {
-            return DataTypeNullable(std::make_shared<DataTypeNothing>())
-                   .createColumnConstWithDefaultValue(input_rows_count);
-        }
+        /// TODO: cosineDistance does not support nullable arguments
+        /// https://github.com/ClickHouse/ClickHouse/pull/27933#issuecomment-916670286
+        auto return_type = getReturnTypeImpl(arguments);
+        if (return_type->isNullable())
+            return return_type->createColumnConstWithDefaultValue(input_rows_count);

        FunctionDotProduct dot(context);
        ColumnWithTypeAndName dot_result{dot.executeImpl(arguments, DataTypePtr(), input_rows_count),
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -722,7 +722,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
            /// TODO: parser should fail early when max_query_size limit is reached.
            ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);

-#ifndef NDEBUG
+#if 0
            /// Verify that AST formatting is consistent:
            /// If you format AST, parse it back, and format it again, you get the same string.

--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -347,7 +347,7 @@ const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
 {
    std::scoped_lock lock(index_mutex);
    if (!index_loaded)
-        loadIndex(lock);
+        loadIndex();
    index_loaded = true;
    return index;
 }
@ -569,6 +569,7 @@ void IMergeTreeDataPart::removeIfNeeded()

 UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const
 {
+    std::scoped_lock lock(index_mutex);
    UInt64 res = 0;
    for (const ColumnPtr & column : index)
        res += column->byteSize();
@ -577,6 +578,7 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const

 UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const
 {
+    std::scoped_lock lock(index_mutex);
    UInt64 res = 0;
    for (const ColumnPtr & column : index)
        res += column->allocatedBytes();
@ -828,7 +830,7 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co
 {
 }

-void IMergeTreeDataPart::loadIndex(std::scoped_lock<std::mutex> &) const
+void IMergeTreeDataPart::loadIndex() const
 {
    /// Memory for index must not be accounted as memory usage for query, because it belongs to a table.
    MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -3,6 +3,7 @@
 #include <IO/WriteSettings.h>
 #include <Core/Block.h>
 #include <base/types.h>
+#include <base/defines.h>
 #include <Core/NamesAndTypes.h>
 #include <Storages/IStorage.h>
 #include <Storages/LightweightDeleteDescription.h>
@ -565,8 +566,8 @@ protected:
    /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple.
    /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
    mutable std::mutex index_mutex;
-    mutable Index index;
-    mutable bool index_loaded = false;
+    mutable Index index TSA_GUARDED_BY(index_mutex);
+    mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false;

    /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
    ColumnSize total_columns_size;
@ -664,7 +665,7 @@ private:
    virtual void appendFilesOfIndexGranularity(Strings & files) const;

    /// Loads the index file.
-    void loadIndex(std::scoped_lock<std::mutex> &) const;
+    void loadIndex() const TSA_REQUIRES(index_mutex);

    void appendFilesOfIndex(Strings & files) const;

--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -73,6 +73,7 @@ static void splitAndModifyMutationCommands(
    LoggerPtr log)
 {
    auto part_columns = part->getColumnsDescription();
+    const auto & table_columns = metadata_snapshot->getColumns();

    if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
    {
@ -81,9 +82,19 @@ static void splitAndModifyMutationCommands(

        for (const auto & command : commands)
        {
+            if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
+            {
+                /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
+                /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
+                auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
+                if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
+                {
+                    for_interpreter.push_back(command);
+                    mutated_columns.emplace(command.column_name);
+                }
+            }
            if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
-                || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
                || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                || command.type == MutationCommand::Type::MATERIALIZE_TTL
                || command.type == MutationCommand::Type::DELETE
@ -93,9 +104,6 @@ static void splitAndModifyMutationCommands(
                for_interpreter.push_back(command);
                for (const auto & [column_name, expr] : command.column_to_update_expression)
                    mutated_columns.emplace(column_name);
-
-                if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
-                    mutated_columns.emplace(command.column_name);
            }
            else if (command.type == MutationCommand::Type::DROP_INDEX
                     || command.type == MutationCommand::Type::DROP_PROJECTION
@ -205,8 +213,15 @@ static void splitAndModifyMutationCommands(
    {
        for (const auto & command : commands)
        {
-            if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
-                || command.type == MutationCommand::Type::MATERIALIZE_COLUMN
+            if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
+            {
+                /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values
+                /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file
+                auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
+                if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
+                    for_interpreter.push_back(command);
+            }
+            else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
                || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC
                || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                || command.type == MutationCommand::Type::MATERIALIZE_TTL
--- a/tests/performance/sum.xml
+++ b/tests/performance/sum.xml
@ -17,6 +17,13 @@
    <query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
    <query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>

+    <query>select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000)</query>
+
    <!-- Create a table with ~20% null values. Make it random so the branch predictor doesn't do all the work -->
    <create_query>CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory</create_query>
    <fill_query>INSERT INTO nullfloat32
--- a/tests/queries/0_stateless/02008_materialize_column.sql
+++ b/tests/queries/0_stateless/02008_materialize_column.sql
@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2);
 SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;

+ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values;
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3);
 SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
--- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference
@ -0,0 +1,45 @@
+DEFAULT expressions
+-- Compact parts
+Before materialize
+1	1
+2	54321
+After materialize
+1	1
+2	54321
+-- Wide parts
+Before materialize
+1	1
+2	54321
+After materialize
+1	1
+2	54321
+-- Nullable column != physically absent
+Before materialize
+1	1
+2	\N
+3	54321
+After materialize
+1	1
+2	\N
+3	54321
+-- Parts with renamed column
+Before materialize
+1	1
+2	54321
+After rename
+1	1
+2	54321
+After materialize
+1	1
+2	54321
+MATERIALIZED expressions
+-- Compact parts
+Before materialize
+1	54321
+After materialize
+1	65432
+-- Compact parts
+Before materialize
+1	54321
+After materialize
+1	65432
--- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
+++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql
@ -0,0 +1,85 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS tab;
+
+-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN>
+SELECT 'DEFAULT expressions';
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id, dflt) VALUES (1, 1);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Before materialize';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Wide parts';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id, dflt) VALUES (1, 1);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Before materialize';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Nullable column != physically absent';
+
+CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id, dflt) VALUES (1, 1);
+INSERT INTO tab (id, dflt) VALUES (2, NULL);
+INSERT INTO tab (id) VALUES (3);
+SELECT 'Before materialize';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Parts with renamed column';
+
+CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id, dflt) VALUES (1, 1);
+INSERT INTO tab (id) VALUES (2);
+SELECT 'Before materialize';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab RENAME COLUMN dflt TO dflt2;
+SELECT 'After rename';
+SELECT * FROM tab ORDER BY id;
+ALTER TABLE tab MATERIALIZE COLUMN dflt2;
+SELECT 'After materialize';
+SELECT * FROM tab ORDER BY id;
+DROP TABLE tab;
+
+-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime.
+SELECT 'MATERIALIZED expressions';
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id;
+INSERT INTO tab (id) VALUES (1);
+SELECT 'Before materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
+ALTER TABLE tab MATERIALIZE COLUMN mtrl;
+SELECT 'After materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+DROP TABLE tab;
+
+SELECT '-- Compact parts';
+
+CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1;
+INSERT INTO tab (id) VALUES (1);
+SELECT 'Before materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432;
+ALTER TABLE tab MATERIALIZE COLUMN mtrl;
+SELECT 'After materialize';
+SELECT id, mtrl FROM tab ORDER BY id;
+DROP TABLE tab;
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
@ -0,0 +1,12 @@
+49500
+49500
+49500
+49500
+49500
+49500
+450000
+450000
+450000
+450000
+450000
+450000
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
@ -0,0 +1,14 @@
+select sumIf(number::Int128, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt128, number % 10 == 0) from numbers(1000);
+select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);
+
+-- Test when the condition is neither 0 nor 1
+select sumIf(number::Int128, number % 10) from numbers(1000);
+select sumIf(number::UInt128, number % 10) from numbers(1000);
+select sumIf(number::Int256, number % 10) from numbers(1000);
+select sumIf(number::UInt256, number % 10) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10) from numbers(1000);
--- a/tests/queries/0_stateless/02994_cosineDistanceNullable.reference
+++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference
@ -0,0 +1,11 @@
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
--- a/tests/queries/0_stateless/02994_cosineDistanceNullable.sql
+++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql
@ -0,0 +1,3 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/59596
+SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1));
+SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)) from numbers(10);