Merge branch 'master' into segmentator-fix

2024-11-24 00:22:29 +00:00 · 2023-03-24 16:19:57 +01:00 · 2023-03-24 16:19:57 +01:00 · a05aa5c1c6
commit a05aa5c1c6
parent 98c9b1f75c fd567e03a5
255 changed files with 10160 additions and 2846 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -353,12 +353,14 @@ if (COMPILER_CLANG)

        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")

-        # The LLVM MachO linker (ld64.lld) generates by default unwind info in 'compact' format which the internal unwinder doesn't support
-        # and the server will not come up ('invalid compact unwind encoding'). Disable it.
-        # You will see warning during the build "ld64.lld: warning: Option `-no_compact_unwind' is undocumented. Should lld implement it?".
-        # Yes, ld64.lld does not document the option, likely for compat with Apple's system ld after which ld64.lld is modeled after and
-        # which also does not document it.
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_compact_unwind")
+        # The LLVM MachO linker (ld64.lld, used in native builds) generates by default unwind info in 'compact' format which the internal
+        # unwinder doesn't support and the server will not come up ('invalid compact unwind encoding'). Disable it. You will see warning
+        # during the build "ld64.lld: warning: Option `-no_compact_unwind' is undocumented. Should lld implement it?". Yes, ld64.lld does
+        # not document the option, likely for compat with Apple's system ld after which ld64.lld is modeled after and which also does not
+        # document it.
+        if (NOT CMAKE_CROSSCOMPILING)
+            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_compact_unwind")
+        endif ()
    endif()

    # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@ -1 +1 @@
-Subproject commit d80af319f5f047067b956b2fe93a6c00038c1e0d
+Subproject commit 4bfaeb31dd0ef13f025221f93c138974a3e0a22a
--- a/contrib/murmurhash/src/MurmurHash2.cpp
+++ b/contrib/murmurhash/src/MurmurHash2.cpp
@ -31,6 +31,40 @@
 #define BIG_CONSTANT(x) (x##LLU)

 #endif // !defined(_MSC_VER)
+//
+//-----------------------------------------------------------------------------
+// Block read - on little-endian machines this is a single load,
+// while on big-endian or unknown machines the byte accesses should
+// still get optimized into the most efficient instruction.
+static inline uint32_t getblock ( const uint32_t * p )
+{
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return *p;
+#else
+  const uint8_t *c = (const uint8_t *)p;
+  return (uint32_t)c[0] |
+	 (uint32_t)c[1] <<  8 |
+	 (uint32_t)c[2] << 16 |
+	 (uint32_t)c[3] << 24;
+#endif
+}
+
+static inline uint64_t getblock ( const uint64_t * p )
+{
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return *p;
+#else
+  const uint8_t *c = (const uint8_t *)p;
+  return (uint64_t)c[0] |
+	 (uint64_t)c[1] <<  8 |
+	 (uint64_t)c[2] << 16 |
+	 (uint64_t)c[3] << 24 |
+	 (uint64_t)c[4] << 32 |
+	 (uint64_t)c[5] << 40 |
+	 (uint64_t)c[6] << 48 |
+	 (uint64_t)c[7] << 56;
+#endif
+}

 //-----------------------------------------------------------------------------

@ -52,7 +86,7 @@ uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )

  while(len >= 4)
  {
-    uint32_t k = *(uint32_t*)data;
+    uint32_t k = getblock((const uint32_t *)data);

    k *= m;
    k ^= k >> r;
@ -105,7 +139,7 @@ uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )

  while(data != end)
  {
-    uint64_t k = *data++;
+    uint64_t k = getblock(data++);

    k *= m; 
    k ^= k >> r; 
@ -151,12 +185,12 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )

  while(len >= 8)
  {
-    uint32_t k1 = *data++;
+    uint32_t k1 = getblock(data++);
    k1 *= m; k1 ^= k1 >> r; k1 *= m;
    h1 *= m; h1 ^= k1;
    len -= 4;

-    uint32_t k2 = *data++;
+    uint32_t k2 = getblock(data++);
    k2 *= m; k2 ^= k2 >> r; k2 *= m;
    h2 *= m; h2 ^= k2;
    len -= 4;
@ -164,7 +198,7 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )

  if(len >= 4)
  {
-    uint32_t k1 = *data++;
+    uint32_t k1 = getblock(data++);
    k1 *= m; k1 ^= k1 >> r; k1 *= m;
    h1 *= m; h1 ^= k1;
    len -= 4;
@ -215,7 +249,7 @@ uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )

  while(len >= 4)
  {
-    uint32_t k = *(uint32_t*)data;
+    uint32_t k = getblock((const uint32_t *)data);

    mmix(h,k);

@ -278,7 +312,7 @@ public:

    while(len >= 4)
    {
-      uint32_t k = *(uint32_t*)data;
+      uint32_t k = getblock((const uint32_t *)data);

      mmix(m_hash,k);

@ -427,7 +461,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )

    while(len >= 4)
    {
-      d = *(uint32_t *)data;
+      d = getblock((const uint32_t *)data);
      t = (t >> sr) | (d << sl);

      uint32_t k = t;
@ -492,7 +526,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
  {
    while(len >= 4)
    {
-      uint32_t k = *(uint32_t *)data;
+      uint32_t k = getblock((const uint32_t *)data);

      MIX(h,k,m);

--- a/contrib/murmurhash/src/MurmurHash3.cpp
+++ b/contrib/murmurhash/src/MurmurHash3.cpp
@ -55,14 +55,32 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )

 FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
 {
-  uint32_t res;
-  memcpy(&res, p + i, sizeof(res));
-  return res;
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return p[i];
+#else
+  const uint8_t *c = (const uint8_t *)&p[i];
+  return (uint32_t)c[0] |
+	 (uint32_t)c[1] <<  8 |
+	 (uint32_t)c[2] << 16 |
+	 (uint32_t)c[3] << 24;
+#endif
 }

 FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
 {
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  return p[i];
+#else
+  const uint8_t *c = (const uint8_t *)&p[i];
+  return (uint64_t)c[0] |
+	 (uint64_t)c[1] <<  8 |
+	 (uint64_t)c[2] << 16 |
+	 (uint64_t)c[3] << 24 |
+	 (uint64_t)c[4] << 32 |
+	 (uint64_t)c[5] << 40 |
+	 (uint64_t)c[6] << 48 |
+	 (uint64_t)c[7] << 56;
+#endif
 }

 //-----------------------------------------------------------------------------
@ -329,9 +347,13 @@ void MurmurHash3_x64_128 ( const void * key, const size_t len,

  h1 += h2;
  h2 += h1;
-
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  ((uint64_t*)out)[0] = h1;
  ((uint64_t*)out)[1] = h2;
+#else
+  ((uint64_t*)out)[0] = h2;
+  ((uint64_t*)out)[1] = h1;
+#endif
 }

 //-----------------------------------------------------------------------------
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
@ -0,0 +1,530 @@
+#!/bin/bash
+ckhost="localhost"
+ckport=("9000" "9001" "9002" "9003")
+WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
+OUTPUT_DIR="${WORKING_DIR}/output"
+LOG_DIR="${OUTPUT_DIR}/log"
+RAWDATA_DIR="${WORKING_DIR}/rawdata_dir"
+database_dir="${WORKING_DIR}/database_dir"
+CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts"
+LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)"
+QUERY_FILE="queries_ssb.sql"
+SERVER_BIND_CMD[0]="numactl -m 0 -N 0"
+SERVER_BIND_CMD[1]="numactl -m 0 -N 0"
+SERVER_BIND_CMD[2]="numactl -m 1 -N 1"
+SERVER_BIND_CMD[3]="numactl -m 1 -N 1"
+CLIENT_BIND_CMD=""
+SSB_GEN_FACTOR=20
+TABLE_NAME="lineorder_flat"
+TALBE_ROWS="119994608"
+CODEC_CONFIG="lz4 deflate zstd"
+
+# define instance number
+inst_num=$1
+if [ ! -n "$1" ]; then
+        echo "Please clarify instance number from 1,2,3 or 4"
+        exit 1
+else
+        echo "Benchmarking with instance number:$1"
+fi
+
+if [ ! -d "$OUTPUT_DIR" ]; then
+mkdir $OUTPUT_DIR
+fi
+if [ ! -d "$LOG_DIR" ]; then
+mkdir $LOG_DIR
+fi
+if [ ! -d "$RAWDATA_DIR" ]; then
+mkdir $RAWDATA_DIR
+fi
+
+# define different directories
+dir_server=("" "_s2" "_s3" "_s4")
+ckreadSql="
+    CREATE TABLE customer
+    (
+            C_CUSTKEY       UInt32,
+            C_NAME          String,
+            C_ADDRESS       String,
+            C_CITY          LowCardinality(String),
+            C_NATION        LowCardinality(String),
+            C_REGION        LowCardinality(String),
+            C_PHONE         String,
+            C_MKTSEGMENT    LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
+
+    CREATE TABLE lineorder
+    (
+        LO_ORDERKEY             UInt32,
+        LO_LINENUMBER           UInt8,
+        LO_CUSTKEY              UInt32,
+        LO_PARTKEY              UInt32,
+        LO_SUPPKEY              UInt32,
+        LO_ORDERDATE            Date,
+        LO_ORDERPRIORITY        LowCardinality(String),
+        LO_SHIPPRIORITY         UInt8,
+        LO_QUANTITY             UInt8,
+        LO_EXTENDEDPRICE        UInt32,
+        LO_ORDTOTALPRICE        UInt32,
+        LO_DISCOUNT             UInt8,
+        LO_REVENUE              UInt32,
+        LO_SUPPLYCOST           UInt32,
+        LO_TAX                  UInt8,
+        LO_COMMITDATE           Date,
+        LO_SHIPMODE             LowCardinality(String)
+    )
+    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
+
+    CREATE TABLE part
+    (
+            P_PARTKEY       UInt32,
+            P_NAME          String,
+            P_MFGR          LowCardinality(String),
+            P_CATEGORY      LowCardinality(String),
+            P_BRAND         LowCardinality(String),
+            P_COLOR         LowCardinality(String),
+            P_TYPE          LowCardinality(String),
+            P_SIZE          UInt8,
+            P_CONTAINER     LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY P_PARTKEY;
+
+    CREATE TABLE supplier
+    (
+            S_SUPPKEY       UInt32,
+            S_NAME          String,
+            S_ADDRESS       String,
+            S_CITY          LowCardinality(String),
+            S_NATION        LowCardinality(String),
+            S_REGION        LowCardinality(String),
+            S_PHONE         String
+    )
+    ENGINE = MergeTree ORDER BY S_SUPPKEY;
+"
+supplier_table="
+   CREATE TABLE supplier
+    (
+            S_SUPPKEY       UInt32,
+            S_NAME          String,
+            S_ADDRESS       String,
+            S_CITY          LowCardinality(String),
+            S_NATION        LowCardinality(String),
+            S_REGION        LowCardinality(String),
+            S_PHONE         String
+    )
+    ENGINE = MergeTree ORDER BY S_SUPPKEY;
+"
+part_table="
+    CREATE TABLE part
+    (
+            P_PARTKEY       UInt32,
+            P_NAME          String,
+            P_MFGR          LowCardinality(String),
+            P_CATEGORY      LowCardinality(String),
+            P_BRAND         LowCardinality(String),
+            P_COLOR         LowCardinality(String),
+            P_TYPE          LowCardinality(String),
+            P_SIZE          UInt8,
+            P_CONTAINER     LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY P_PARTKEY;
+"
+lineorder_table="
+    CREATE TABLE lineorder
+    (
+        LO_ORDERKEY             UInt32,
+        LO_LINENUMBER           UInt8,
+        LO_CUSTKEY              UInt32,
+        LO_PARTKEY              UInt32,
+        LO_SUPPKEY              UInt32,
+        LO_ORDERDATE            Date,
+        LO_ORDERPRIORITY        LowCardinality(String),
+        LO_SHIPPRIORITY         UInt8,
+        LO_QUANTITY             UInt8,
+        LO_EXTENDEDPRICE        UInt32,
+        LO_ORDTOTALPRICE        UInt32,
+        LO_DISCOUNT             UInt8,
+        LO_REVENUE              UInt32,
+        LO_SUPPLYCOST           UInt32,
+        LO_TAX                  UInt8,
+        LO_COMMITDATE           Date,
+        LO_SHIPMODE             LowCardinality(String)
+    )
+    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
+"
+customer_table="
+    CREATE TABLE customer
+    (
+            C_CUSTKEY       UInt32,
+            C_NAME          String,
+            C_ADDRESS       String,
+            C_CITY          LowCardinality(String),
+            C_NATION        LowCardinality(String),
+            C_REGION        LowCardinality(String),
+            C_PHONE         String,
+            C_MKTSEGMENT    LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
+"
+
+lineorder_flat_table="
+    SET max_memory_usage = 20000000000;
+    CREATE TABLE lineorder_flat
+    ENGINE = MergeTree
+    PARTITION BY toYear(LO_ORDERDATE)
+    ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
+    SELECT
+        l.LO_ORDERKEY AS LO_ORDERKEY,
+        l.LO_LINENUMBER AS LO_LINENUMBER,
+        l.LO_CUSTKEY AS LO_CUSTKEY,
+        l.LO_PARTKEY AS LO_PARTKEY,
+        l.LO_SUPPKEY AS LO_SUPPKEY,
+        l.LO_ORDERDATE AS LO_ORDERDATE,
+        l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
+        l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
+        l.LO_QUANTITY AS LO_QUANTITY,
+        l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
+        l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
+        l.LO_DISCOUNT AS LO_DISCOUNT,
+        l.LO_REVENUE AS LO_REVENUE,
+        l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
+        l.LO_TAX AS LO_TAX,
+        l.LO_COMMITDATE AS LO_COMMITDATE,
+        l.LO_SHIPMODE AS LO_SHIPMODE,
+        c.C_NAME AS C_NAME,
+        c.C_ADDRESS AS C_ADDRESS,
+        c.C_CITY AS C_CITY,
+        c.C_NATION AS C_NATION,
+        c.C_REGION AS C_REGION,
+        c.C_PHONE AS C_PHONE,
+        c.C_MKTSEGMENT AS C_MKTSEGMENT,
+        s.S_NAME AS S_NAME,
+        s.S_ADDRESS AS S_ADDRESS,
+        s.S_CITY AS S_CITY,
+        s.S_NATION AS S_NATION,
+        s.S_REGION AS S_REGION,
+        s.S_PHONE AS S_PHONE,
+        p.P_NAME AS P_NAME,
+        p.P_MFGR AS P_MFGR,
+        p.P_CATEGORY AS P_CATEGORY,
+        p.P_BRAND AS P_BRAND,
+        p.P_COLOR AS P_COLOR,
+        p.P_TYPE AS P_TYPE,
+        p.P_SIZE AS P_SIZE,
+        p.P_CONTAINER AS P_CONTAINER
+    FROM lineorder AS l
+    INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
+    INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
+    INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
+    show settings ilike 'max_memory_usage';
+"
+ 
+function insert_data(){
+        echo "insert_data:$1"
+        create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q"
+        insert_data_prefix="clickhouse client --query "
+        case $1 in
+          all)
+                clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && {
+                ${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2
+                }
+                ${create_table_prefix}"${lineorder_flat_table}" 
+          ;;
+          customer)
+                echo ${create_table_prefix}\"${customer_table}\"
+                ${create_table_prefix}"${customer_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          part)
+                echo ${create_table_prefix}\"${part_table}\"
+                ${create_table_prefix}"${part_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          supplier)
+                echo ${create_table_prefix}"${supplier_table}"
+                ${create_table_prefix}"${supplier_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          lineorder)
+                echo ${create_table_prefix}"${lineorder_table}"
+                ${create_table_prefix}"${lineorder_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          lineorder_flat)
+                echo ${create_table_prefix}"${lineorder_flat_table}"
+                ${create_table_prefix}"${lineorder_flat_table}" 
+                return 0
+          ;;
+          *)
+                exit 0
+                ;;
+
+        esac
+}
+
+function check_sql(){
+        select_sql="select * from "$1" limit 1"
+        clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}"
+}
+
+function check_table(){
+        checknum=0
+        source_tables="customer part supplier lineorder lineorder_flat"
+        test_tables=${1:-${source_tables}}
+        echo "Checking table data required in server..."
+        for i in $(seq 0 $[inst_num-1])
+        do
+                for j in `echo ${test_tables}`
+                do
+                        check_sql $j ${ckport[i]} &> /dev/null || {
+                                let checknum+=1 && insert_data "$j" ${ckport[i]}
+                        }
+                done
+        done
+
+        for i in $(seq 0 $[inst_num-1])
+        do
+                echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\""
+                var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};")
+                if [ $var -eq $TALBE_ROWS ];then
+                        echo "Instance_${i} Table data integrity check OK -> Rows:$var"
+                else
+                        echo  "Instance_${i} Table data integrity check Failed -> Rows:$var"
+                        exit 1
+                fi
+        done
+        if [ $checknum -gt 0 ];then
+                echo "Need sleep 10s after first table data insertion...$checknum"
+                sleep 10
+        fi
+}
+
+function check_instance(){
+instance_alive=0
+for i in {1..10}
+do
+        sleep 1
+        netstat -nltp | grep ${1} > /dev/null
+        if [ $? -ne 1 ];then
+                instance_alive=1
+                break
+        fi
+        
+done
+
+if [ $instance_alive -eq 0 ];then
+        echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!"
+        exit 1
+else
+        echo "check_instance -> clickhouse server instance launch successfully!"
+fi
+}
+
+function start_clickhouse_for_insertion(){
+        echo "start_clickhouse_for_insertion"
+        for i in $(seq 0 $[inst_num-1])
+	do                
+                echo "cd ${database_dir}/$1${dir_server[i]}"
+                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null"
+                
+	        cd ${database_dir}/$1${dir_server[i]}
+	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null
+                check_instance ${ckport[i]}
+        done
+}
+
+function start_clickhouse_for_stressing(){
+        echo "start_clickhouse_for_stressing"
+        for i in $(seq 0 $[inst_num-1])
+	do
+                echo "cd ${database_dir}/$1${dir_server[i]}"
+                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&"
+                
+	        cd ${database_dir}/$1${dir_server[i]}
+	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&
+                check_instance ${ckport[i]}
+        done
+}
+yum -y install git make gcc sudo net-tools &> /dev/null
+pip3 install clickhouse_driver numpy &> /dev/null
+test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen
+
+if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then
+        make && {
+        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c
+        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
+        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
+        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
+        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
+        }
+else
+        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c
+        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T p
+        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
+        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T d
+        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
+
+fi
+
+filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l`
+
+if [ $filenum -ne 5 ];then
+        echo "generate ssb data file *.tbl faild"
+        exit 1
+fi
+
+function kill_instance(){
+instance_alive=1  
+for i in {1..2}
+do
+	pkill clickhouse && sleep 5
+        instance_alive=0        
+        for i in $(seq 0 $[inst_num-1])
+        do
+                netstat -nltp | grep ${ckport[i]} > /dev/null
+                if [ $? -ne 1 ];then
+                        instance_alive=1
+                        break;
+                fi
+        done
+        if [ $instance_alive -eq 0 ];then
+                break;
+        fi        
+done
+if [ $instance_alive -eq 0 ];then
+        echo "kill_instance OK!"
+else
+        echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout"
+        exit 1        
+fi
+}
+
+function run_test(){
+is_xml=0
+for i in $(seq 0 $[inst_num-1])
+do
+        if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then
+                is_xml=$[is_xml+1]
+        fi
+done
+if [ $is_xml -eq $inst_num ];then
+        echo "Benchmark with $inst_num instance"
+        start_clickhouse_for_insertion ${1}
+
+        for i in $(seq 0 $[inst_num-1])
+        do
+                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
+        done
+
+        if [ $? -eq 0 ];then
+                check_table
+        fi
+        kill_instance
+
+        if [ $1 == "deflate" ];then
+	        test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool`
+	        if [ -n "$deflatemsg" ];then
+	                echo ------------------------------------------------------
+	                echo $deflatemsg
+	                echo ------------------------------------------------------
+	        fi
+	fi
+        echo "Check table data required in server_${1} -> Done! "
+        
+        start_clickhouse_for_stressing ${1}
+        for i in $(seq 0 $[inst_num-1])
+        do
+                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
+        done
+        if [ $? -eq 0 ];then
+                test -d ${CLIENT_SCRIPTS_DIR}  && cd ${CLIENT_SCRIPTS_DIR}
+                echo "Client stressing... "
+                echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log"
+                ${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log
+                echo "Completed client stressing, checking log... "
+                finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l`
+	        if [ $finish_log -eq 1 ] ;then
+                        kill_instance
+	                test -f ${LOG_DIR}/${1}.log && echo  "${1}.log ===> ${LOG_DIR}/${1}.log"
+	        else
+	                kill_instance
+	                echo "No find 'Finished' in client log -> Performance test may fail"
+	                exit 1
+
+	        fi
+
+	    else
+                echo "${1} clickhouse server start fail"
+                exit 1
+        fi
+else
+        echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance"
+        exit 1
+
+fi
+}
+function clear_log(){
+        if [ -d "$LOG_DIR" ]; then
+                cd ${LOG_DIR} && rm -rf *
+        fi     
+}
+
+function gather_log_for_codec(){
+        cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1}
+        cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1}
+}
+
+function pack_log(){
+        if [ -e "${OUTPUT_DIR}/run.log" ]; then
+                cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/
+        fi
+        echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}"
+}
+
+function setup_check(){
+
+        iax_dev_num=`accel-config list | grep iax | wc -l`
+	if [ $iax_dev_num -eq 0 ] ;then
+                iax_dev_num=`accel-config list | grep iax | wc -l`
+                if [ $iax_dev_num -eq 0 ] ;then
+                        echo "No IAA devices available -> Please check IAA hardware setup manually!"
+                        exit 1
+                else
+	                echo "IAA enabled devices number:$iax_dev_num"
+                fi
+	else
+	        echo "IAA enabled devices number:$iax_dev_num"
+	fi        
+        libaccel_version=`accel-config -v`
+        clickhouser_version=`clickhouse server --version`
+        kernel_dxd_log=`dmesg | grep dxd`
+        echo "libaccel_version:$libaccel_version"
+        echo "clickhouser_version:$clickhouser_version"
+        echo -e "idxd section in kernel log:\n$kernel_dxd_log"
+}
+
+setup_check
+export CLICKHOUSE_WATCHDOG_ENABLE=0
+for i in  ${CODEC_CONFIG[@]}
+do
+        clear_log
+        codec=${i}
+        echo "run test------------$codec"
+        run_test $codec
+        gather_log_for_codec $codec
+done
+
+pack_log
+echo "Done."
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
@ -0,0 +1,278 @@
+from operator import eq
+import os
+import random
+import time
+import sys
+from clickhouse_driver import Client
+import numpy as np
+import subprocess
+import multiprocessing
+from multiprocessing import Manager
+
+warmup_runs = 10
+calculated_runs = 10
+seconds = 30
+max_instances_number = 8
+retest_number = 3
+retest_tolerance = 10
+
+
+def checkInt(str):
+    try:
+        int(str)
+        return True
+    except ValueError:
+        return False
+
+
+def setup_client(index):
+    if index < 4:
+        port_idx = index
+    else:
+        port_idx = index + 4
+    client = Client(
+        host="localhost",
+        database="default",
+        user="default",
+        password="",
+        port="900%d" % port_idx,
+    )
+    union_mode_query = "SET union_default_mode='DISTINCT'"
+    client.execute(union_mode_query)
+    return client
+
+
+def warm_client(clientN, clientL, query, loop):
+    for c_idx in range(clientN):
+        for _ in range(loop):
+            clientL[c_idx].execute(query)
+
+
+def read_queries(queries_list):
+    queries = list()
+    queries_id = list()
+    with open(queries_list, "r") as f:
+        for line in f:
+            line = line.rstrip()
+            line = line.split("$")
+            queries_id.append(line[0])
+            queries.append(line[1])
+    return queries_id, queries
+
+
+def run_task(client, cname, query, loop, query_latency):
+    start_time = time.time()
+    for i in range(loop):
+        client.execute(query)
+        query_latency.append(client.last_query.elapsed)
+
+    end_time = time.time()
+    p95 = np.percentile(query_latency, 95)
+    print(
+        "CLIENT: {0} end. -> P95: %f, qps: %f".format(cname)
+        % (p95, loop / (end_time - start_time))
+    )
+
+
+def run_multi_clients(clientN, clientList, query, loop):
+    client_pids = {}
+    start_time = time.time()
+    manager = multiprocessing.Manager()
+    query_latency_list0 = manager.list()
+    query_latency_list1 = manager.list()
+    query_latency_list2 = manager.list()
+    query_latency_list3 = manager.list()
+    query_latency_list4 = manager.list()
+    query_latency_list5 = manager.list()
+    query_latency_list6 = manager.list()
+    query_latency_list7 = manager.list()
+
+    for c_idx in range(clientN):
+        client_name = "Role_%d" % c_idx
+        if c_idx == 0:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list0),
+            )
+        elif c_idx == 1:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list1),
+            )
+        elif c_idx == 2:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list2),
+            )
+        elif c_idx == 3:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list3),
+            )
+        elif c_idx == 4:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list4),
+            )
+        elif c_idx == 5:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list5),
+            )
+        elif c_idx == 6:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list6),
+            )
+        elif c_idx == 7:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list7),
+            )
+        else:
+            print("ERROR: CLIENT number dismatch!!")
+            exit()
+        print("CLIENT: %s start" % client_name)
+        client_pids[c_idx].start()
+
+    for c_idx in range(clientN):
+        client_pids[c_idx].join()
+    end_time = time.time()
+    totalT = end_time - start_time
+
+    query_latencyTotal = list()
+    for item in query_latency_list0:
+        query_latencyTotal.append(item)
+    for item in query_latency_list1:
+        query_latencyTotal.append(item)
+    for item in query_latency_list2:
+        query_latencyTotal.append(item)
+    for item in query_latency_list3:
+        query_latencyTotal.append(item)
+    for item in query_latency_list4:
+        query_latencyTotal.append(item)
+    for item in query_latency_list5:
+        query_latencyTotal.append(item)
+    for item in query_latency_list6:
+        query_latencyTotal.append(item)
+    for item in query_latency_list7:
+        query_latencyTotal.append(item)
+
+    totalP95 = np.percentile(query_latencyTotal, 95) * 1000
+    return totalT, totalP95
+
+
+def run_task_caculated(client, cname, query, loop):
+    query_latency = list()
+    start_time = time.time()
+    for i in range(loop):
+        client.execute(query)
+        query_latency.append(client.last_query.elapsed)
+    end_time = time.time()
+    p95 = np.percentile(query_latency, 95)
+
+
+def run_multi_clients_caculated(clientN, clientList, query, loop):
+    client_pids = {}
+    start_time = time.time()
+    for c_idx in range(clientN):
+        client_name = "Role_%d" % c_idx
+        client_pids[c_idx] = multiprocessing.Process(
+            target=run_task_caculated,
+            args=(clientList[c_idx], client_name, query, loop),
+        )
+        client_pids[c_idx].start()
+    for c_idx in range(clientN):
+        client_pids[c_idx].join()
+    end_time = time.time()
+    totalT = end_time - start_time
+    return totalT
+
+
+if __name__ == "__main__":
+    client_number = 1
+    queries = list()
+    queries_id = list()
+
+    if len(sys.argv) != 3:
+        print(
+            "usage: python3 client_stressing_test.py [queries_file_path] [client_number]"
+        )
+        sys.exit()
+    else:
+        queries_list = sys.argv[1]
+        client_number = int(sys.argv[2])
+        print(
+            "queries_file_path: %s, client_number: %d" % (queries_list, client_number)
+        )
+        if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK):
+            print("please check the right path for queries file")
+            sys.exit()
+        if (
+            not checkInt(sys.argv[2])
+            or int(sys.argv[2]) > max_instances_number
+            or int(sys.argv[2]) < 1
+        ):
+            print("client_number should be in [1~%d]" % max_instances_number)
+            sys.exit()
+
+    client_list = {}
+    queries_id, queries = read_queries(queries_list)
+
+    for c_idx in range(client_number):
+        client_list[c_idx] = setup_client(c_idx)
+    # clear cache
+    os.system("sync; echo 3 > /proc/sys/vm/drop_caches")
+
+    print("###Polit Run Begin")
+    for i in queries:
+        warm_client(client_number, client_list, i, 1)
+    print("###Polit Run End -> Start stressing....")
+
+    query_index = 0
+    for q in queries:
+        print(
+            "\n###START -> Index: %d, ID: %s, Query: %s"
+            % (query_index, queries_id[query_index], q)
+        )
+        warm_client(client_number, client_list, q, warmup_runs)
+        print("###Warm Done!")
+        for j in range(0, retest_number):
+            totalT = run_multi_clients_caculated(
+                client_number, client_list, q, calculated_runs
+            )
+            curr_loop = int(seconds * calculated_runs / totalT) + 1
+            print(
+                "###Calculation Done! -> loopN: %d, expected seconds:%d"
+                % (curr_loop, seconds)
+            )
+
+            print("###Stress Running! -> %d iterations......" % curr_loop)
+
+            totalT, totalP95 = run_multi_clients(
+                client_number, client_list, q, curr_loop
+            )
+
+            if totalT > (seconds - retest_tolerance) and totalT < (
+                seconds + retest_tolerance
+            ):
+                break
+            else:
+                print(
+                    "###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!"
+                    % (totalT, seconds, j)
+                )
+
+        print(
+            "###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f"
+            % (
+                queries_id[query_index],
+                client_number,
+                totalT,
+                totalT * 1000 / (curr_loop * client_number),
+                totalP95,
+                ((curr_loop * client_number) / totalT),
+            )
+        )
+        query_index += 1
+    print("###Finished!")
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
@ -0,0 +1,10 @@
+Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
+Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC;
+Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
+Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
+Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC;
+Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC;
+Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC;
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
@ -0,0 +1,6 @@
+WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
+if [ ! -d "${WORKING_DIR}/output" ]; then
+mkdir ${WORKING_DIR}/output
+fi
+bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log
+echo "Please check log in: ${WORKING_DIR}/output/run.log"
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>deflate_qpl</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>deflate_qpl</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>lz4</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>lz4</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>zstd</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>zstd</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/vectorscan
+++ b/contrib/vectorscan
@ -1 +1 @@
-Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2
+Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -128,7 +128,7 @@ function run_tests()
    set +e

    if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
-        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 \
+        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
            --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
            -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -10,31 +10,38 @@ import requests
 import tempfile


-DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
+DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"

 AVAILABLE_DATASETS = {
-    'hits': 'hits_v1.tar',
-    'visits': 'visits_v1.tar',
+    "hits": "hits_v1.tar",
+    "visits": "visits_v1.tar",
 }

 RETRIES_COUNT = 5

+
 def _get_temp_file_name():
-    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
+    return os.path.join(
+        tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
+    )
+

 def build_url(base_url, dataset):
-    return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
+    return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
+

 def dowload_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
    for i in range(RETRIES_COUNT):
        try:
-            with open(path, 'wb') as f:
+            with open(path, "wb") as f:
                response = requests.get(url, stream=True)
                response.raise_for_status()
-                total_length = response.headers.get('content-length')
+                total_length = response.headers.get("content-length")
                if total_length is None or int(total_length) == 0:
-                    logging.info("No content-length, will download file without progress")
+                    logging.info(
+                        "No content-length, will download file without progress"
+                    )
                    f.write(response.content)
                else:
                    dl = 0
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
                        if sys.stdout.isatty():
                            done = int(50 * dl / total_length)
                            percent = int(100 * float(dl) / total_length)
-                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
+                            sys.stdout.write(
+                                "\r[{}{}] {}%".format(
+                                    "=" * done, " " * (50 - done), percent
+                                )
+                            )
                            sys.stdout.flush()
            break
        except Exception as ex:
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
            if os.path.exists(path):
                os.remove(path)
    else:
-        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
+        raise Exception(
+            "Cannot download dataset from {}, all retries exceeded".format(url)
+        )

    sys.stdout.write("\n")
    logging.info("Downloading finished")

+
 def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
-    logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
-    with tarfile.open(tar_path, 'r') as comp_file:
+    logging.info(
+        "Will unpack data from temp path %s to clickhouse db %s",
+        tar_path,
+        clickhouse_path,
+    )
+    with tarfile.open(tar_path, "r") as comp_file:
        comp_file.extractall(path=clickhouse_path)
    logging.info("Unpack finished")

@ -72,15 +90,21 @@ if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
-        description="Simple tool for dowloading datasets for clickhouse from S3")
+        description="Simple tool for dowloading datasets for clickhouse from S3"
+    )

-    parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
-    parser.add_argument('--url-prefix', default=DEFAULT_URL)
-    parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
+    parser.add_argument(
+        "--dataset-names",
+        required=True,
+        nargs="+",
+        choices=list(AVAILABLE_DATASETS.keys()),
+    )
+    parser.add_argument("--url-prefix", default=DEFAULT_URL)
+    parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")

    args = parser.parse_args()
    datasets = args.dataset_names
-    logging.info("Will fetch following datasets: %s", ', '.join(datasets))
+    logging.info("Will fetch following datasets: %s", ", ".join(datasets))
    for dataset in datasets:
        logging.info("Processing %s", dataset)
        temp_archive_path = _get_temp_file_name()
@ -92,10 +116,11 @@ if __name__ == "__main__":
            logging.info("Some exception occured %s", str(ex))
            raise
        finally:
-            logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
+            logging.info(
+                "Will remove downloaded file %s from filesystem if it exists",
+                temp_archive_path,
+            )
            if os.path.exists(temp_archive_path):
                os.remove(temp_archive_path)
        logging.info("Processing of %s finished", dataset)
    logging.info("Fetch finished, enjoy your tables!")
-
-
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -170,6 +170,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
 fi

 rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
+rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
 zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &

 # Compress tables.
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    aspell \
    curl \
    git \
+    file \
    libxml2-utils \
    moreutils \
    python3-fuzzywuzzy \
    python3-pip \
    shellcheck \
    yamllint \
-    && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
+    && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
    && apt-get clean \
    && rm -rf /root/.cache/pip

--- a/docs/en/development/building_and_benchmarking_deflate_qpl.md
+++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md
@ -0,0 +1,283 @@
+---
+slug: /en/development/building_and_benchmarking_deflate_qpl
+sidebar_position: 73
+sidebar_label: Building and Benchmarking DEFLATE_QPL
+description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
+---
+# Build Clickhouse with DEFLATE_QPL
+- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
+- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine:
+``` bash
+cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 ..
+```
+or
+``` bash
+cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 ..
+```
+- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
+
+# Run Benchmark with DEFLATE_QPL
+## Files list
+The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts:
+
+`client_scripts` contains python scripts for running typical benchmark, for example:
+- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances.
+- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/)
+- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically.
+
+`database_files` means it will store database files according to lz4/deflate/zstd codec.
+
+## Run benchmark automatically for Star Schema:
+``` bash
+$ cd ./benchmark_sample/client_scripts
+$ sh run_ssb.sh
+```
+After complete, please check all the results in this folder:`./output/`
+
+In case you run into failure, please manually run benchmark as below sections.
+
+## Definition
+[CLICKHOUSE_EXE] means the path of clickhouse executable program.
+
+## Environment
+- CPU: Sapphire Rapid
+- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements)
+- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration)
+- Install python modules:
+``` bash
+pip3 install clickhouse_driver numpy
+```
+[Self-check for IAA]
+``` bash
+$ accel-config list | grep -P 'iax|state'
+```
+Expected output like this:
+``` bash
+    "dev":"iax1",
+    "state":"enabled",
+            "state":"enabled",
+```
+If you see nothing output, it means IAA is not ready to work. Please check IAA setup again.
+
+## Generate raw data
+``` bash
+$ cd ./benchmark_sample
+$ mkdir rawdata_dir && cd rawdata_dir
+```
+Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters:
+-s 20
+
+The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`:
+
+## Database setup
+Set up database with LZ4 codec
+
+``` bash
+$ cd ./database_dir/lz4
+$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server.
+
+Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema)
+- Creating tables in ClickHouse
+- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data.
+- Converting “star schema” to de-normalized “flat schema”
+
+Set up database with with IAA Deflate codec
+
+``` bash
+$ cd ./database_dir/deflate
+$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Complete three steps same as lz4 above
+
+Set up database with with ZSTD codec
+
+``` bash
+$ cd ./database_dir/zstd
+$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Complete three steps same as lz4 above
+
+[self-check]
+For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully:
+```sql
+select count() from lineorder_flat
+```
+You are expected to see below output:
+```sql
+┌───count()─┐
+│ 119994608 │
+└───────────┘
+```
+[Self-check for IAA Deflate codec]
+At the first time you execute insertion or query from client, clickhouse server console is expected to print this log:
+```text
+Hardware-assisted DeflateQpl codec is ready!
+```
+If you never find this, but see another log as below:
+```text
+Initialization of hardware-assisted DeflateQpl codec failed
+```
+That means IAA devices is not ready, you need check IAA setup again.
+
+## Benchmark with single instance 
+- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance`
+``` bash
+$ cpupower idle-set -d 3
+$ cpupower frequency-set -g performance
+```
+- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket.
+- Single instance means single server connected with single client
+
+Now run benchmark for LZ4/Deflate/ZSTD respectively:
+
+LZ4:
+``` bash
+$ cd ./database_dir/lz4 
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log
+```
+
+IAA deflate:
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log
+```
+
+Now three logs should be output as expected:
+```text
+lz4.log
+deflate.log
+zstd.log
+```
+
+How to check performance metrics:
+
+We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
+
+## Benchmark with multi-instances
+- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances.
+- Multi-instance means multiple（2 or 4）servers connected with respective client.
+- The cores of one socket need to be divided equally and assigned to the servers respectively.
+- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance.
+
+There are 2 differences: 
+- For client side, you need launch clickhouse with the assigned port during table creation and data insertion.
+- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config.
+
+Here we assume there are 60 cores per socket and take 2 instances for example.
+Launch server for first instance
+LZ4:
+``` bash
+$ cd ./database_dir/lz4
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+```
+IAA Deflate:
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+```
+[Launch server for second instance]
+
+LZ4:
+``` bash
+$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2
+$ cp ../../server_config/config_lz4_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
+```
+ZSTD:
+``` bash
+$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2
+$ cp ../../server_config/config_zstd_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
+```
+IAA Deflate:
+``` bash
+$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2
+$ cp ../../server_config/config_deflate_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
+```
+
+Creating tables && Inserting data for second instance
+
+Creating tables:
+``` bash
+$ [CLICKHOUSE_EXE] client -m --port=9001 
+```
+Inserting data:
+``` bash
+$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl  --port=9001
+```
+- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`.
+- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance.
+
+Benchmarking with 2 instances
+
+LZ4:
+``` bash
+$ cd ./database_dir/lz4
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ cd ./database_dir/lz4_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2  > lz4_2insts.log
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ cd ./database_dir/zstd_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& 
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log
+```
+IAA deflate
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ cd ./database_dir/deflate_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log
+```
+Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/
+
+Now three logs should be output as expected:
+``` text
+lz4_2insts.log
+deflate_2insts.log
+zstd_2insts.log
+```
+How to check performance metrics:
+
+We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
+
+Benchmark setup for 4 instances is similar with 2 instances above.
+We recommend use 2 instances benchmark data as final report for review.
+
+## Tips
+Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one:
+``` bash
+$ ps -aux| grep clickhouse
+$ kill -9 [PID]
+```
+By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences.
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -377,8 +377,9 @@ CREATE TABLE table_name
    i32 Int32,
    s String,
    ...
-    INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
-    INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4
+    INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3,
+    INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3,
+    INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4
 ) ENGINE = MergeTree()
 ...
 ```
@ -386,8 +387,25 @@ CREATE TABLE table_name
 Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:

 ``` sql
-SELECT count() FROM table WHERE s < 'z'
-SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
+SELECT count() FROM table WHERE u64 == 10;
+SELECT count() FROM table WHERE u64 * i32 >= 1234
+SELECT count() FROM table WHERE u64 * length(s) == 1234
+```
+
+Data skipping indexes can also be created on composite columns:
+
+```sql
+-- on columns of type Map:
+INDEX map_key_index mapKeys(map_column) TYPE bloom_filter
+INDEX map_value_index mapValues(map_column) TYPE bloom_filter
+
+-- on columns of type Tuple:
+INDEX tuple_1_index tuple_column.1 TYPE bloom_filter
+INDEX tuple_2_index tuple_column.2 TYPE bloom_filter
+
+-- on columns of type Nested:
+INDEX nested_1_index col.nested_col1 TYPE bloom_filter
+INDEX nested_2_index col.nested_col2 TYPE bloom_filter
 ```

 ### Available Types of Indices {#available-types-of-indices}
@ -432,20 +450,6 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
 - An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
 - An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.

-## Example of index creation for Map data type
-
-```
-INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1
-INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1
-```
-
-
-``` sql
-INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
-INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
-INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
-```
-
 ### Functions Support {#functions-support}

 Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
--- a/docs/en/engines/table-engines/special/generate.md
+++ b/docs/en/engines/table-engines/special/generate.md
@ -15,7 +15,7 @@ Usage examples:
 ## Usage in ClickHouse Server {#usage-in-clickhouse-server}

 ``` sql
-ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
+ENGINE = GenerateRandom([random_seed [,max_string_length [,max_array_length]]])
 ```

 The `max_array_length` and `max_string_length` parameters specify maximum length of all
--- a/docs/en/getting-started/example-datasets/_category_.yml
+++ b/docs/en/getting-started/example-datasets/_category_.yml
@ -1,7 +0,0 @@
-position: 1
-label: 'Example Datasets'
-collapsible: true
-collapsed: true
-link:
-  type: doc
-  id: en/getting-started/example-datasets/
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -154,7 +154,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe
 In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
 If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing.

-Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array.
+Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array.

 For example:

@ -1150,7 +1150,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y

 ### Usage of Nested Structures {#jsoneachrow-nested}

-If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
+If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.

 For example, consider the following table:

@ -1776,7 +1776,7 @@ message MessageType {
 ```

 ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on).
-Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md).
+Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md).

 Default values defined in a protobuf schema like this

@ -1978,7 +1978,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t

 - [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
 - [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`.
+- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
 - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
 - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
 - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
--- a/docs/en/interfaces/overview.md
+++ b/docs/en/interfaces/overview.md
@ -6,7 +6,7 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien
 description: ClickHouse provides three network interfaces
 ---

-# Interfaces
+# Drivers and Interfaces

 ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security):

--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -331,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
            <s3>
                <volumes>
                    <main>
-                        <disk>s3</disk>
+                        <disk>s3_plain</disk>
                    </main>
                </volumes>
            </s3>
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -964,7 +964,7 @@ Default value: 1.

 ### input_format_arrow_import_nested {#input_format_arrow_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.

 Possible values:

@ -1024,7 +1024,7 @@ Default value: `none`.

 ### input_format_orc_import_nested {#input_format_orc_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.

 Possible values:

@ -1073,7 +1073,7 @@ Default value: `none`.

 ### input_format_parquet_import_nested {#input_format_parquet_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.

 Possible values:

@ -1538,6 +1538,6 @@ Default value: `1GiB`.

 ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}

-Allow types conversion in Native input format between columns from input data and requested columns. 
+Allow types conversion in Native input format between columns from input data and requested columns.

 Enabled by default.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -3438,7 +3438,7 @@ Default value: `throw`.

 ## flatten_nested {#flatten-nested}

-Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
+Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.

 Possible values:

@ -4049,3 +4049,32 @@ Possible values:
 - 1 - enabled

 Default value: `0`.
+
+## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
+When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterward, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
+
+**Example without setting on Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000)
+
+Cancelling query.
+Ok.
+Query was cancelled.
+
+0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
+```
+
+**Example with setting on Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
+
+┌──────sum(number)─┐
+│ 1355411451286266 │
+└──────────────────┘
+
+1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
+```
+
+Possible values: `true`, `false`
+
+Default value: `false`
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@ -1,7 +1,7 @@
 ---
 slug: /en/operations/utilities/
 sidebar_position: 56
-sidebar_label: Utilities
+sidebar_label: List of tools and utilities
 pagination_next: 'en/operations/utilities/clickhouse-copier'
 ---

--- a/docs/en/sql-reference/data-types/index.md
+++ b/docs/en/sql-reference/data-types/index.md
@ -1,13 +1,33 @@
 ---
 slug: /en/sql-reference/data-types/
-sidebar_label: Data Types
+sidebar_label: List of data types
 sidebar_position: 37
 ---

-# Data Types 
+# ClickHouse Data Types

-ClickHouse can store various kinds of data in table cells.
+ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.

-This section describes the supported data types and special considerations for using and/or implementing them if any.
+:::note
+You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
+:::

-You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
+ClickHouse data types include:
+
+- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`)
+- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md)
+- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md)
+- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md)
+- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time
+- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column
+- **UUID**: a performant option for storing [`UUID` values](./uuid.md)
+- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column
+- **Arrays**: any column can be defined as an [`Array` of values](./array.md)
+- **Maps**: use [`Map`](./map.md) for storing key/value pairs
+- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
+- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
+- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
+- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
+- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
+- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
+- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
--- a/docs/en/sql-reference/data-types/nested-data-structures/index.md
+++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md
@ -1,7 +1,105 @@
 ---
-slug: /en/sql-reference/data-types/nested-data-structures/
-sidebar_label: Nested Data Structures
-sidebar_position: 54
+slug: /en/sql-reference/data-types/nested-data-structures/nested
+sidebar_position: 57
+sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
 ---

-# Nested Data Structures
+# Nested
+
+## Nested(name1 Type1, Name2 Type2, …)
+
+A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
+
+Example:
+
+``` sql
+CREATE TABLE test.visits
+(
+    CounterID UInt32,
+    StartDate Date,
+    Sign Int8,
+    IsNew UInt8,
+    VisitID UInt64,
+    UserID UInt64,
+    ...
+    Goals Nested
+    (
+        ID UInt32,
+        Serial UInt32,
+        EventTime DateTime,
+        Price Int64,
+        OrderID String,
+        CurrencyID UInt32
+    ),
+    ...
+) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
+```
+
+This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions.
+
+When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
+
+In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
+
+Example:
+
+``` sql
+SELECT
+    Goals.ID,
+    Goals.EventTime
+FROM test.visits
+WHERE CounterID = 101500 AND length(Goals.ID) < 5
+LIMIT 10
+```
+
+``` text
+┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
+│ [1073752,591325,591325]        │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27']                       │
+│ [1073752]                      │ ['2014-03-17 00:28:25']                                                                   │
+│ [1073752]                      │ ['2014-03-17 10:46:20']                                                                   │
+│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
+│ []                             │ []                                                                                        │
+│ [1073752,591325,591325]        │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21']                       │
+│ []                             │ []                                                                                        │
+│ []                             │ []                                                                                        │
+│ [591325,1073752]               │ ['2014-03-17 00:46:05','2014-03-17 00:46:05']                                             │
+│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
+└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
+
+The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
+
+``` sql
+SELECT
+    Goal.ID,
+    Goal.EventTime
+FROM test.visits
+ARRAY JOIN Goals AS Goal
+WHERE CounterID = 101500 AND length(Goals.ID) < 5
+LIMIT 10
+```
+
+``` text
+┌─Goal.ID─┬──────Goal.EventTime─┐
+│ 1073752 │ 2014-03-17 16:38:10 │
+│  591325 │ 2014-03-17 16:38:48 │
+│  591325 │ 2014-03-17 16:42:27 │
+│ 1073752 │ 2014-03-17 00:28:25 │
+│ 1073752 │ 2014-03-17 10:46:20 │
+│ 1073752 │ 2014-03-17 13:59:20 │
+│  591325 │ 2014-03-17 22:17:55 │
+│  591325 │ 2014-03-17 22:18:07 │
+│  591325 │ 2014-03-17 22:18:51 │
+│ 1073752 │ 2014-03-17 11:37:06 │
+└─────────┴─────────────────────┘
+```
+
+You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
+
+For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
+
+For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
+
+The ALTER query for elements in a nested data structure has limitations.
--- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md
+++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md
@ -1,105 +0,0 @@
---
-slug: /en/sql-reference/data-types/nested-data-structures/nested
-sidebar_position: 57
-sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
---
-
-# Nested
-
-## Nested(name1 Type1, Name2 Type2, …)
-
-A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
-
-Example:
-
-``` sql
-CREATE TABLE test.visits
-(
-    CounterID UInt32,
-    StartDate Date,
-    Sign Int8,
-    IsNew UInt8,
-    VisitID UInt64,
-    UserID UInt64,
-    ...
-    Goals Nested
-    (
-        ID UInt32,
-        Serial UInt32,
-        EventTime DateTime,
-        Price Int64,
-        OrderID String,
-        CurrencyID UInt32
-    ),
-    ...
-) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
-```
-
-This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions.
-
-When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
-
-In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
-
-Example:
-
-``` sql
-SELECT
-    Goals.ID,
-    Goals.EventTime
-FROM test.visits
-WHERE CounterID = 101500 AND length(Goals.ID) < 5
-LIMIT 10
-```
-
-``` text
-┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
-│ [1073752,591325,591325]        │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27']                       │
-│ [1073752]                      │ ['2014-03-17 00:28:25']                                                                   │
-│ [1073752]                      │ ['2014-03-17 10:46:20']                                                                   │
-│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
-│ []                             │ []                                                                                        │
-│ [1073752,591325,591325]        │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21']                       │
-│ []                             │ []                                                                                        │
-│ []                             │ []                                                                                        │
-│ [591325,1073752]               │ ['2014-03-17 00:46:05','2014-03-17 00:46:05']                                             │
-│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
-└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
-It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
-
-The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
-
-``` sql
-SELECT
-    Goal.ID,
-    Goal.EventTime
-FROM test.visits
-ARRAY JOIN Goals AS Goal
-WHERE CounterID = 101500 AND length(Goals.ID) < 5
-LIMIT 10
-```
-
-``` text
-┌─Goal.ID─┬──────Goal.EventTime─┐
-│ 1073752 │ 2014-03-17 16:38:10 │
-│  591325 │ 2014-03-17 16:38:48 │
-│  591325 │ 2014-03-17 16:42:27 │
-│ 1073752 │ 2014-03-17 00:28:25 │
-│ 1073752 │ 2014-03-17 10:46:20 │
-│ 1073752 │ 2014-03-17 13:59:20 │
-│  591325 │ 2014-03-17 22:17:55 │
-│  591325 │ 2014-03-17 22:18:07 │
-│  591325 │ 2014-03-17 22:18:51 │
-│ 1073752 │ 2014-03-17 11:37:06 │
-└─────────┴─────────────────────┘
-```
-
-You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
-
-For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
-
-For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
-
-The ALTER query for elements in a nested data structure has limitations.
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1232,12 +1232,14 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
 ```

-## formatDateTime
+## formatDateTime {#date_time_functions-formatDateTime}

 Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.

 formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.

+The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
+
 Alias: `DATE_FORMAT`.

 **Syntax**
@ -1257,7 +1259,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 |----------|---------------------------------------------------------|------------|
 | %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
 | %b       | abbreviated month name (Jan-Dec)                        | Jan        |
-| %c       | month as a decimal number (01-12)                       | 01         |
+| %c       | month as an integer number (01-12)                      | 01         |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
@ -1273,7 +1275,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %j       | day of the year (001-366)                               | 002        |
 | %k       | hour in 24h format (00-23)                              | 22         |
 | %l       | hour in 12h format (01-12)                              | 09         |
-| %m       | month as a decimal number (01-12)                       | 01         |
+| %m       | month as an integer number (01-12)                      | 01         |
 | %M       | minute (00-59)                                          | 33         |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
@ -1286,7 +1288,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44   |
 | %u       | ISO 8601 weekday as number with Monday as 1 (1-7)       | 2          |
 | %V       | ISO 8601 week number (01-53)                            | 01         |
-| %w       | weekday as a decimal number with Sunday as 0 (0-6)      | 2          |
+| %w       | weekday as a integer number with Sunday as 0 (0-6)      | 2          |
 | %W       | full weekday name (Monday-Sunday)                       | Monday     |
 | %y       | Year, last two digits (00-99)                           | 18         |
 | %Y       | Year                                                    | 2018       |
@ -1328,10 +1330,11 @@ Result:
 -   [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)


-## formatDateTimeInJodaSyntax
+## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax}

 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.

+The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).

 **Replacement fields**

--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@ -375,7 +375,7 @@ For a case-insensitive search or/and in UTF-8 format use functions `multiSearchA
 In all `multiSearch*` functions the number of needles should be less than 2<sup>8</sup> because of implementation specification.
 :::

-## match(haystack, pattern)
+## match(haystack, pattern), haystack REGEXP pattern operator

 Checks whether string `haystack` matches the regular expression `pattern`. The pattern is an [re2 regular expression](https://github.com/google/re2/wiki/Syntax) which has a more limited syntax than Perl regular expressions.

--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -1148,6 +1148,85 @@ Result:
 └───────────────────────────┴──────────────────────────────┘
 ```

+## parseDateTime {#type_conversion_functions-parseDateTime}
+
+Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
+
+This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).
+
+**Syntax**
+
+``` sql
+parseDateTime(str, format[, timezone])
+```
+
+**Arguments**
+
+-   `str` — the String to be parsed
+-   `format` — the format string
+-   `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
+
+**Returned value(s)**
+
+Returns DateTime values parsed from input string according to a MySQL style format string.
+
+**Supported format specifiers**
+
+All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
+- %f: fractional second
+- %Q: Quarter (1-4) 
+
+**Example**
+
+``` sql
+SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')
+
+┌─parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')─┐
+│                                       2021-01-04 23:00:00 │
+└───────────────────────────────────────────────────────────┘
+```
+
+Alias: `TO_TIMESTAMP`.
+
+## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
+
+Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
+
+This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
+
+**Syntax**
+
+``` sql
+parseDateTimeInJodaSyntax(str, format[, timezone])
+```
+
+**Arguments**
+
+-   `str` — the String to be parsed
+-   `format` — the format string
+-   `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
+
+**Returned value(s)**
+
+Returns DateTime values parsed from input string according to a Joda style format.
+
+**Supported format specifiers**
+
+All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except:
+- S: fraction of second
+- z: time zone
+- Z: time zone offset/id
+
+**Example**
+
+``` sql
+SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')
+
+┌─parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')─┐
+│                                                                     2023-02-24 14:53:31 │
+└─────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
 ## parseDateTimeBestEffort
 ## parseDateTime32BestEffort

@ -1351,7 +1430,6 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that

 Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity and returns zero date or zero date time when it encounters a date format that cannot be processed.

-
 ## toLowCardinality

 Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type.
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -381,8 +381,8 @@ High compression levels are useful for asymmetric scenarios, like compress once,
 `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:

 -   DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`.
-   DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions
-   DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device
+-   DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details.
+-   DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details.
 -   DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512

 ### Specialized Codecs
--- a/docs/en/sql-reference/statements/describe-table.md
+++ b/docs/en/sql-reference/statements/describe-table.md
@ -24,9 +24,9 @@ The `DESCRIBE` statement returns a row for each table column with the following
 -   `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
 -   `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.

-All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
+All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.

-To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. 
+To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.

 **Example**

--- a/docs/en/sql-reference/statements/index.md
+++ b/docs/en/sql-reference/statements/index.md
@ -1,10 +1,10 @@
 ---
 slug: /en/sql-reference/statements/
 sidebar_position: 1
-sidebar_label: Statements
+sidebar_label: List of statements
 ---

-# ClickHouse SQL Statements 
+# ClickHouse SQL Statements

 Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately:

--- a/docs/en/sql-reference/statements/select/array-join.md
+++ b/docs/en/sql-reference/statements/select/array-join.md
@ -185,7 +185,7 @@ SETTINGS enable_unaligned_array_join = 1;

 ## ARRAY JOIN with Nested Data Structure

-`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/nested.md):
+`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md):

 ``` sql
 CREATE TABLE nested_test
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -4084,3 +4084,32 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
 Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).

 Значение по умолчанию: `''`.
+
+## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
+Если установлено значение `true` и пользователь хочет прервать запрос (например, с помощью `Ctrl+C` на клиенте), то запрос продолжает выполнение только для данных, которые уже были считаны из таблицы. После этого он вернет частичный результат запроса для той части таблицы, которая была прочитана. Чтобы полностью остановить выполнение запроса без частичного результата, пользователь должен отправить 2 запроса отмены.
+
+**Пример с выключенной настройкой при нажатии Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000)
+
+Cancelling query.
+Ok.
+Query was cancelled.
+
+0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
+```
+
+**Пример с включенной настройкой при нажатии Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
+
+┌──────sum(number)─┐
+│ 1355411451286266 │
+└──────────────────┘
+
+1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
+```
+
+Возможные значения:: `true`, `false`
+
+Значение по умолчанию: `false`
--- a/docs/zh/engines/database-engines/materialize-mysql.md
+++ b/docs/zh/engines/database-engines/materialize-mysql.md
@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user',

 ### DDL查询 {#ddl-queries}

-MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询，则该查询将被忽略。
+MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询，则该查询将被忽略。

 ### Data Replication {#data-replication}

--- a/docs/zh/engines/database-engines/materialized-mysql.md
+++ b/docs/zh/engines/database-engines/materialized-mysql.md
@ -109,7 +109,7 @@ MySQL中的Time 类型，会被ClickHouse转换成微秒来存储

 ### DDL Queries {#ddl-queries}

-MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句，比如： ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作，则会跳过。
+MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句，比如： ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作，则会跳过。


 ### 数据复制 {#data-replication}
--- a/docs/zh/faq/general.md
+++ b/docs/zh/faq/general.md
@ -1,5 +1,5 @@
 ---
-slug: /zh/faq/general
+slug: /zh/faq/general/overview
 ---
 # 常见问题 {#chang-jian-wen-ti}

--- a/docs/zh/faq/general/index.md
+++ b/docs/zh/faq/general/index.md
@ -21,8 +21,7 @@ sidebar_label: General
 -   [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)


-
 !!! info "没找到您需要的内容?"
-    请查阅 [其他 F.A.Q. 类别](../../faq/) 或者从左侧导航栏浏览其他文档
-    
+    请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档
+
 {## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##}
--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@ -338,6 +338,12 @@ UserID.bin，URL.bin，和EventTime.bin是<font face = "monospace">UserID</font>
 :::note
 - 最后一个颗粒（1082颗粒）是少于8192行的。

+- 我们在本指南开头的“DDL 语句详细信息”中提到，我们禁用了自适应索引粒度（为了简化本指南中的讨论，并使图表和结果可重现）。
+
+  因此，示例表中所有颗粒（除了最后一个）都具有相同大小。
+
+- 对于具有自适应索引粒度的表（默认情况下索引粒度是自适应的），某些粒度的大小可以小于 8192 行，具体取决于行数据大小。
+
 - 我们将主键列(<font face = "monospace">UserID</font>, <font face = "monospace">URL</font>)中的一些列值标记为橙色。

  这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082)，最后一个颗粒我们标记的是最大的值。
--- a/docs/zh/sql-reference/functions/geo/index.mdx
+++ b/docs/zh/sql-reference/functions/geo/index.mdx
@ -1,10 +0,0 @@
---
-slug: /zh/sql-reference/functions/geo/
-sidebar_label: Geo
-sidebar_position: 62
-title: "Geo Functions"
---
-
-import Content from '@site/docs/en/sql-reference/functions/geo/index.md';
-
-<Content />
--- a/docs/zh/sql-reference/statements/alter/index.md
+++ b/docs/zh/sql-reference/statements/alter/index.md
@ -1,5 +1,5 @@
 ---
-slug: /zh/sql-reference/statements/alter/
+slug: /zh/sql-reference/statements/alter/overview
 sidebar_position: 35
 sidebar_label: ALTER
 ---
--- a/docs/zh/sql-reference/statements/create/index.md
+++ b/docs/zh/sql-reference/statements/create/index.md
@ -1,11 +0,0 @@
---
-slug: /zh/sql-reference/statements/create/
-sidebar_label: CREATE
-sidebar_position: 34
---
-
-# CREATE语法 {#create-queries}
-
-CREATE语法包含以下子集:
-
-   [DATABASE](../../../sql-reference/statements/create/database.md)
--- a/docs/zh/sql-reference/statements/index.md
+++ b/docs/zh/sql-reference/statements/index.md
@ -10,7 +10,7 @@ sidebar_position: 31

 -   [SELECT](../../sql-reference/statements/select/index.md)
 -   [INSERT INTO](../../sql-reference/statements/insert-into.md)
-   [CREATE](../../sql-reference/statements/create/index.md)
+-   [CREATE](../../sql-reference/statements/create.md)
 -   [ALTER](../../sql-reference/statements/alter/index.md)
 -   [SYSTEM](../../sql-reference/statements/system.md)
 -   [SHOW](../../sql-reference/statements/show.md)
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -222,6 +222,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            ("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
            ("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create")
            ("group", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create")
+            ("noninteractive,y", "run non-interactively")
+            ("link", "create symlink to the binary instead of copying to binary-path")
        ;

        po::variables_map options;
@ -267,8 +269,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)

        /// Copy binary to the destination directory.

-        /// TODO An option to link instead of copy - useful for developers.
-
        fs::path prefix = options["prefix"].as<std::string>();
        fs::path bin_dir = prefix / options["binary-path"].as<std::string>();

@ -281,76 +281,129 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        bool old_binary_exists = fs::exists(main_bin_path);
        bool already_installed = false;

-        /// Check if the binary is the same file (already installed).
-        if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
+        if (options.count("link"))
        {
-            already_installed = true;
-            fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
-        }
-        /// Check if binary has the same content.
-        else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
-        {
-            fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
-                main_bin_path.string());
-
-            if (filesEqual(binary_self_path.string(), main_bin_path.string()))
+            if (old_binary_exists)
            {
-                already_installed = true;
-                fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
-                    main_bin_path.string(), binary_self_canonical_path.string());
-            }
-        }
+                bool is_symlink = FS::isSymlink(main_bin_path);
+                fs::path points_to;
+                if (is_symlink)
+                    points_to = fs::weakly_canonical(FS::readSymlink(main_bin_path));

-        if (already_installed)
-        {
-            if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
+                if (is_symlink && points_to == binary_self_canonical_path)
+                {
+                    already_installed = true;
+                }
+                else
+                {
+                    if (!is_symlink)
+                    {
+                        fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
+                                   main_bin_path.string(), main_bin_old_path.string());
+                        fs::rename(main_bin_path, main_bin_old_path);
+                    }
+                    else if (points_to != main_bin_path)
+                    {
+                        fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
+                                   main_bin_path.string(), points_to.string(), binary_self_canonical_path.string());
+                        fs::remove(main_bin_path);
+                    }
+                }
+            }
+
+            if (!already_installed)
+            {
+                if (!fs::exists(bin_dir))
+                {
+                    fmt::print("Creating binary directory {}.\n", bin_dir.string());
+                    fs::create_directories(bin_dir);
+                }
+
+                fmt::print("Creating symlink {} to {}.\n", main_bin_path.string(), binary_self_canonical_path.string());
+                fs::create_symlink(binary_self_canonical_path, main_bin_path);
+
+                if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                    throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR);
+            }
        }
        else
        {
-            if (!fs::exists(bin_dir))
+            bool is_symlink = FS::isSymlink(main_bin_path);
+
+            if (!is_symlink)
            {
-                fmt::print("Creating binary directory {}.\n", bin_dir.string());
-                fs::create_directories(bin_dir);
+                /// Check if the binary is the same file (already installed).
+                if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
+                {
+                    already_installed = true;
+                    fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
+                }
+                /// Check if binary has the same content.
+                else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
+                {
+                    fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
+                        main_bin_path.string());
+
+                    if (filesEqual(binary_self_path.string(), main_bin_path.string()))
+                    {
+                        already_installed = true;
+                        fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
+                            main_bin_path.string(), binary_self_canonical_path.string());
+                    }
+                }
            }

-            size_t available_space = fs::space(bin_dir).available;
-            if (available_space < binary_size)
-                throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
-                    bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
-
-            fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
-
-            try
+            if (already_installed)
            {
-                ReadBufferFromFile in(binary_self_path.string());
-                WriteBufferFromFile out(main_bin_tmp_path.string());
-                copyData(in, out);
-                out.sync();
-
-                if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                    throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
-
-                out.finalize();
+                if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                    throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
            }
-            catch (const Exception & e)
+            else
            {
-                if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
-                    std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
-                throw;
+                if (!fs::exists(bin_dir))
+                {
+                    fmt::print("Creating binary directory {}.\n", bin_dir.string());
+                    fs::create_directories(bin_dir);
+                }
+
+                size_t available_space = fs::space(bin_dir).available;
+                if (available_space < binary_size)
+                    throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
+                        bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
+
+                fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
+
+                try
+                {
+                    ReadBufferFromFile in(binary_self_path.string());
+                    WriteBufferFromFile out(main_bin_tmp_path.string());
+                    copyData(in, out);
+                    out.sync();
+
+                    if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                        throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
+
+                    out.finalize();
+                }
+                catch (const Exception & e)
+                {
+                    if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
+                        std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
+                    throw;
+                }
+
+                if (old_binary_exists)
+                {
+                    fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
+                            main_bin_path.string(), main_bin_old_path.string());
+
+                    /// There is file exchange operation in Linux but it's not portable.
+                    fs::rename(main_bin_path, main_bin_old_path);
+                }
+
+                fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
+                fs::rename(main_bin_tmp_path, main_bin_path);
            }
-
-            if (old_binary_exists)
-            {
-                fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
-                        main_bin_path.string(), main_bin_old_path.string());
-
-                /// There is file exchange operation in Linux but it's not portable.
-                fs::rename(main_bin_path, main_bin_old_path);
-            }
-
-            fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
-            fs::rename(main_bin_tmp_path, main_bin_path);
        }

        /// Create symlinks.
@ -384,7 +437,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                if (is_symlink)
                    points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));

-                if (is_symlink && points_to == main_bin_path)
+                if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
                {
                    need_to_create = false;
                }
@ -709,7 +762,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        /// dpkg or apt installers can ask for non-interactive work explicitly.

        const char * debian_frontend_var = getenv("DEBIAN_FRONTEND"); // NOLINT(concurrency-mt-unsafe)
-        bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");
+        bool noninteractive = (debian_frontend_var && debian_frontend_var == std::string_view("noninteractive"))
+                              || options.count("noninteractive");
+

        bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;

--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -703,6 +703,9 @@
             actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
             It also enables 'changeable_in_readonly' constraint type -->
        <settings_constraints_replace_previous>false</settings_constraints_replace_previous>
+
+        <!-- Number of seconds since last access a role is stored in the Role Cache -->
+        <role_cache_expiration_time_seconds>600</role_cache_expiration_time_seconds>
    </access_control_improvements>

    <!-- Default profile of settings. -->
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@ -247,7 +247,7 @@ private:
 AccessControl::AccessControl()
    : MultipleAccessStorage("user directories"),
      context_access_cache(std::make_unique<ContextAccessCache>(*this)),
-      role_cache(std::make_unique<RoleCache>(*this)),
+      role_cache(std::make_unique<RoleCache>(*this, 600)),
      row_policy_cache(std::make_unique<RowPolicyCache>(*this)),
      quota_cache(std::make_unique<QuotaCache>(*this)),
      settings_profiles_cache(std::make_unique<SettingsProfilesCache>(*this)),
@ -282,6 +282,8 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
    setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));

    addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
+
+    role_cache = std::make_unique<RoleCache>(*this, config_.getInt("access_control_improvements.role_cache_expiration_time_seconds", 600));
 }


--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@ -56,8 +56,8 @@ namespace
 }


-RoleCache::RoleCache(const AccessControl & access_control_)
-    : access_control(access_control_), cache(600000 /* 10 minutes */)
+RoleCache::RoleCache(const AccessControl & access_control_, int expiration_time_seconds)
+    : access_control(access_control_), cache(expiration_time_seconds * 1000 /* 10 minutes by default*/)
 {
 }

--- a/src/Access/RoleCache.h
+++ b/src/Access/RoleCache.h
@ -16,7 +16,7 @@ using RolePtr = std::shared_ptr<const Role>;
 class RoleCache
 {
 public:
-    explicit RoleCache(const AccessControl & access_control_);
+    explicit RoleCache(const AccessControl & access_control_, int expiration_time_seconds);
    ~RoleCache();

    std::shared_ptr<const EnabledRoles> getEnabledRoles(
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -111,7 +111,6 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
    extern const int ALIAS_REQUIRED;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ILLEGAL_PREWHERE;
    extern const int UNKNOWN_TABLE;
 }

@ -1578,41 +1577,20 @@ void QueryAnalyzer::collectCompoundExpressionValidIdentifiersForTypoCorrection(
    const Identifier & valid_identifier_prefix,
    std::unordered_set<Identifier> & valid_identifiers_result)
 {
-    std::vector<std::pair<Identifier, const IDataType *>> identifiers_with_types_to_process;
-    identifiers_with_types_to_process.emplace_back(valid_identifier_prefix, compound_expression_type.get());
-
-    while (!identifiers_with_types_to_process.empty())
+    IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto &)
    {
-        auto [identifier, type] = identifiers_with_types_to_process.back();
-        identifiers_with_types_to_process.pop_back();
+        Identifier subcolumn_indentifier(name);
+        size_t new_identifier_size = valid_identifier_prefix.getPartsSize() + subcolumn_indentifier.getPartsSize();

-        if (identifier.getPartsSize() + 1 > unresolved_identifier.getPartsSize())
-            continue;
-
-        while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(type))
-            type = array->getNestedType().get();
-
-        const DataTypeTuple * tuple = checkAndGetDataType<DataTypeTuple>(type);
-
-        if (!tuple)
-            continue;
-
-        const auto & tuple_element_names = tuple->getElementNames();
-        size_t tuple_element_names_size = tuple_element_names.size();
-
-        for (size_t i = 0; i < tuple_element_names_size; ++i)
+        if (new_identifier_size == unresolved_identifier.getPartsSize())
        {
-            const auto & element_name = tuple_element_names[i];
-            const auto & element_type = tuple->getElements()[i];
+            auto new_identifier = valid_identifier_prefix;
+            for (const auto & part : subcolumn_indentifier)
+                new_identifier.push_back(part);

-            identifier.push_back(element_name);
-
-            valid_identifiers_result.insert(identifier);
-            identifiers_with_types_to_process.emplace_back(identifier, element_type.get());
-
-            identifier.pop_back();
+            valid_identifiers_result.insert(std::move(new_identifier));
        }
-    }
+    }, ISerialization::SubstreamData(compound_expression_type->getDefaultSerialization()));
 }

 /// Get valid identifiers for typo correction from table expression
@ -2374,7 +2352,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const

    auto expression_type = compound_expression->getResultType();

-    if (!nestedIdentifierCanBeResolved(expression_type, nested_path))
+    if (!expression_type->hasSubcolumn(nested_path.getFullName()))
    {
        std::unordered_set<Identifier> valid_identifiers;
        collectCompoundExpressionValidIdentifiersForTypoCorrection(expression_identifier,
@ -2401,10 +2379,15 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const
            getHintsErrorMessageSuffix(hints));
    }

-    auto tuple_element_result = wrapExpressionNodeInTupleElement(compound_expression, nested_path);
-    resolveFunction(tuple_element_result, scope);
+    QueryTreeNodePtr get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
+    auto & get_subcolumn_function_arguments_nodes = get_subcolumn_function->as<FunctionNode>()->getArguments().getNodes();

-    return tuple_element_result;
+    get_subcolumn_function_arguments_nodes.reserve(2);
+    get_subcolumn_function_arguments_nodes.push_back(compound_expression);
+    get_subcolumn_function_arguments_nodes.push_back(std::make_shared<ConstantNode>(nested_path.getFullName()));
+
+    resolveFunction(get_subcolumn_function, scope);
+    return get_subcolumn_function;
 }

 /** Resolve identifier from expression arguments.
@ -3708,8 +3691,15 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
    {
        auto result_type = expression_query_tree_node->getResultType();

-        while (const auto * array_type = typeid_cast<const DataTypeArray *>(result_type.get()))
-            result_type = array_type->getNestedType();
+        while (true)
+        {
+            if (const auto * array_type = typeid_cast<const DataTypeArray *>(result_type.get()))
+                result_type = array_type->getNestedType();
+            else if (const auto * map_type = typeid_cast<const DataTypeMap *>(result_type.get()))
+                result_type = map_type->getNestedType();
+            else
+                break;
+        }

        const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
        if (!tuple_data_type)
@ -3729,11 +3719,11 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
            if (!matcher_node_typed.isMatchingColumn(element_name))
                continue;

-            auto tuple_element_function = std::make_shared<FunctionNode>("tupleElement");
-            tuple_element_function->getArguments().getNodes().push_back(expression_query_tree_node);
-            tuple_element_function->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(element_name));
+            auto get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
+            get_subcolumn_function->getArguments().getNodes().push_back(expression_query_tree_node);
+            get_subcolumn_function->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(element_name));

-            QueryTreeNodePtr function_query_node = tuple_element_function;
+            QueryTreeNodePtr function_query_node = get_subcolumn_function;
            resolveFunction(function_query_node, scope);

            qualified_matcher_element_identifier.push_back(element_name);
@ -6865,13 +6855,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
    if (query_node_typed.isGroupByAll())
        expandGroupByAll(query_node_typed);

-    if (query_node_typed.hasPrewhere())
-        assertNoFunctionNodes(query_node_typed.getPrewhere(),
-            "arrayJoin",
-            ErrorCodes::ILLEGAL_PREWHERE,
-            "ARRAY JOIN",
-            "in PREWHERE");
-
+    validateFilters(query_node);
    validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls });

    for (const auto & column : projection_columns)
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@ -472,30 +472,6 @@ QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_nod
    return result;
 }

-bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier)
-{
-    const IDataType * current_type = compound_type.get();
-
-    for (const auto & identifier_part : nested_identifier)
-    {
-        while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(current_type))
-            current_type = array->getNestedType().get();
-
-        const DataTypeTuple * tuple = checkAndGetDataType<DataTypeTuple>(current_type);
-
-        if (!tuple)
-            return false;
-
-        auto position = tuple->tryGetPositionByName(identifier_part);
-        if (!position)
-            return false;
-
-        current_type = tuple->getElements()[*position].get();
-    }
-
-    return true;
-}
-
 namespace
 {

--- a/src/Analyzer/Utils.h
+++ b/src/Analyzer/Utils.h
@ -60,14 +60,6 @@ QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_n
  */
 QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_node);

-/** Returns true if nested identifier can be resolved from compound type.
-  * Compound type can be tuple or array of tuples.
-  *
-  * Example: Compound type: Tuple(nested_path Tuple(nested_path_2 UInt64)). Nested identifier: nested_path_1.nested_path_2.
-  * Result: true.
-  */
-bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier);
-
 /** Assert that there are no function nodes with specified function name in node children.
  * Do not visit subqueries.
  */
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@ -17,8 +17,50 @@ namespace ErrorCodes
    extern const int NOT_AN_AGGREGATE;
    extern const int NOT_IMPLEMENTED;
    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+    extern const int ILLEGAL_PREWHERE;
 }

+namespace
+{
+
+void validateFilter(const QueryTreeNodePtr & filter_node, std::string_view exception_place_message, const QueryTreeNodePtr & query_node)
+{
+    auto filter_node_result_type = filter_node->getResultType();
+    if (!filter_node_result_type->canBeUsedInBooleanContext())
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
+            "Invalid type for filter in {}: {}. In query {}",
+            exception_place_message,
+            filter_node_result_type->getName(),
+            query_node->formatASTForErrorMessage());
+}
+
+}
+
+void validateFilters(const QueryTreeNodePtr & query_node)
+{
+    const auto & query_node_typed = query_node->as<QueryNode &>();
+    if (query_node_typed.hasPrewhere())
+    {
+        validateFilter(query_node_typed.getPrewhere(), "PREWHERE", query_node);
+
+        assertNoFunctionNodes(query_node_typed.getPrewhere(),
+            "arrayJoin",
+            ErrorCodes::ILLEGAL_PREWHERE,
+            "ARRAY JOIN",
+            "in PREWHERE");
+    }
+
+    if (query_node_typed.hasWhere())
+        validateFilter(query_node_typed.getWhere(), "WHERE", query_node);
+
+    if (query_node_typed.hasHaving())
+        validateFilter(query_node_typed.getHaving(), "HAVING", query_node);
+}
+
+namespace
+{
+
 class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor<ValidateGroupByColumnsVisitor>
 {
 public:
@ -106,7 +148,9 @@ private:
    const QueryTreeNodePtr & query_node;
 };

-void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params)
+}
+
+void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params)
 {
    const auto & query_node_typed = query_node->as<QueryNode &>();
    auto join_tree_node_type = query_node_typed.getJoinTree()->getNodeType();
--- a/src/Analyzer/ValidationUtils.h
+++ b/src/Analyzer/ValidationUtils.h
@ -5,7 +5,10 @@
 namespace DB
 {

-struct ValidationParams
+/// Validate PREWHERE, WHERE, HAVING in query node
+void validateFilters(const QueryTreeNodePtr & query_node);
+
+struct AggregatesValidationParams
 {
    bool group_by_use_nulls = false;
 };
@ -20,7 +23,7 @@ struct ValidationParams
  * PROJECTION.
  * 5. Throws exception if there is GROUPING SETS or ROLLUP or CUBE or WITH TOTALS without aggregation.
  */
-void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params);
+void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params);

 /** Assert that there are no function nodes with specified function name in node children.
  * Do not visit subqueries.
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@ -441,7 +441,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt

        if (restore_settings.async)
        {
-            backups_thread_pool.scheduleOrThrowOnError(
+            restores_thread_pool.scheduleOrThrowOnError(
                [this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
                {
                    doRestore(
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -261,21 +261,31 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
 }


-std::atomic_flag exit_on_signal;
+std::atomic<Int32> exit_after_signals = 0;

 class QueryInterruptHandler : private boost::noncopyable
 {
 public:
-    static void start() { exit_on_signal.clear(); }
+    /// Store how much interrupt signals can be before stopping the query
+    /// by default stop after the first interrupt signal.
+    static void start(Int32 signals_before_stop = 1) { exit_after_signals.store(signals_before_stop); }
+
+    /// Set value not greater then 0 to mark the query as stopped.
+    static void stop() { return exit_after_signals.store(0); }
+
    /// Return true if the query was stopped.
-    static bool stop() { return exit_on_signal.test_and_set(); }
-    static bool cancelled() { return exit_on_signal.test(); }
+    /// Query was stopped if it received at least "signals_before_stop" interrupt signals.
+    static bool try_stop() { return exit_after_signals.fetch_sub(1) <= 0; }
+    static bool cancelled() { return exit_after_signals.load() <= 0; }
+
+    /// Return how much interrupt signals remain before stop.
+    static Int32 cancelled_status() { return exit_after_signals.load(); }
 };

 /// This signal handler is set only for SIGINT.
 void interruptSignalHandler(int signum)
 {
-    if (QueryInterruptHandler::stop())
+    if (QueryInterruptHandler::try_stop())
        safeExit(128 + signum);
 }

@ -850,12 +860,15 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
        }
    }

+    const auto & settings = global_context->getSettingsRef();
+    const Int32 signals_before_stop = settings.stop_reading_on_first_cancel ? 2 : 1;
+
    int retries_left = 10;
    while (retries_left)
    {
        try
        {
-            QueryInterruptHandler::start();
+            QueryInterruptHandler::start(signals_before_stop);
            SCOPE_EXIT({ QueryInterruptHandler::stop(); });

            connection->sendQuery(
@ -872,7 +885,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
            if (send_external_tables)
                sendExternalTables(parsed_query);

-            receiveResult(parsed_query);
+            receiveResult(parsed_query, signals_before_stop, settings.stop_reading_on_first_cancel);

            break;
        }
@ -897,7 +910,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa

 /// Receives and processes packets coming from server.
 /// Also checks if query execution should be cancelled.
-void ClientBase::receiveResult(ASTPtr parsed_query)
+void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool stop_reading_on_first_cancel)
 {
    // TODO: get the poll_interval from commandline.
    const auto receive_timeout = connection_parameters.timeouts.receive_timeout;
@ -921,7 +934,13 @@ void ClientBase::receiveResult(ASTPtr parsed_query)
            /// to avoid losing sync.
            if (!cancelled)
            {
-                if (QueryInterruptHandler::cancelled())
+                if (stop_reading_on_first_cancel && QueryInterruptHandler::cancelled_status() == signals_before_stop - 1)
+                {
+                    connection->sendCancel();
+                    /// First cancel reading request was sent. Next requests will only be with a full cancel
+                    stop_reading_on_first_cancel = false;
+                }
+                else if (QueryInterruptHandler::cancelled())
                {
                    cancelQuery();
                }
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -131,7 +131,7 @@ protected:


 private:
-    void receiveResult(ASTPtr parsed_query);
+    void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool stop_reading_on_first_cancel);
    bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
    void receiveLogsAndProfileEvents(ASTPtr parsed_query);
    bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description, ASTPtr parsed_query);
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -349,12 +349,14 @@ The server successfully detected this situation and will download merged part fr
    M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
    M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
    \
-    M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
-    M(ReadBufferFromS3InitMicroseconds, "Time spend initializing connection to S3.") \
+    M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
+    M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
    M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
    M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
    \
+    M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \
    M(WriteBufferFromS3Bytes, "Bytes written to S3.") \
+    M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \
    \
    M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
    \
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -46,8 +46,8 @@ ThreadPoolImpl<Thread>::ThreadPoolImpl(size_t max_threads_)
 template <typename Thread>
 ThreadPoolImpl<Thread>::ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, bool shutdown_on_exception_)
    : max_threads(max_threads_)
-    , max_free_threads(max_free_threads_)
-    , queue_size(queue_size_)
+    , max_free_threads(std::min(max_free_threads_, max_threads))
+    , queue_size(queue_size_ ? std::max(queue_size_, max_threads) : 0 /* zero means the queue is unlimited */)
    , shutdown_on_exception(shutdown_on_exception_)
 {
 }
@ -56,10 +56,26 @@ template <typename Thread>
 void ThreadPoolImpl<Thread>::setMaxThreads(size_t value)
 {
    std::lock_guard lock(mutex);
+    bool need_start_threads = (value > max_threads);
+    bool need_finish_free_threads = (value < max_free_threads);
+
    max_threads = value;
+    max_free_threads = std::min(max_free_threads, max_threads);
+
    /// We have to also adjust queue size, because it limits the number of scheduled and already running jobs in total.
-    queue_size = std::max(queue_size, max_threads);
+    queue_size = queue_size ? std::max(queue_size, max_threads) : 0;
    jobs.reserve(queue_size);
+
+    if (need_start_threads)
+    {
+        /// Start new threads while there are more scheduled jobs in the queue and the limit `max_threads` is not reached.
+        startNewThreadsNoLock();
+    }
+    else if (need_finish_free_threads)
+    {
+        /// Wake up free threads so they can finish themselves.
+        new_job_or_shutdown.notify_all();
+    }
 }

 template <typename Thread>
@ -73,14 +89,22 @@ template <typename Thread>
 void ThreadPoolImpl<Thread>::setMaxFreeThreads(size_t value)
 {
    std::lock_guard lock(mutex);
-    max_free_threads = value;
+    bool need_finish_free_threads = (value < max_free_threads);
+
+    max_free_threads = std::min(value, max_threads);
+
+    if (need_finish_free_threads)
+    {
+        /// Wake up free threads so they can finish themselves.
+        new_job_or_shutdown.notify_all();
+    }
 }

 template <typename Thread>
 void ThreadPoolImpl<Thread>::setQueueSize(size_t value)
 {
    std::lock_guard lock(mutex);
-    queue_size = value;
+    queue_size = value ? std::max(value, max_threads) : 0;
    /// Reserve memory to get rid of allocations
    jobs.reserve(queue_size);
 }
@ -159,11 +183,42 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, ssize_t priority, std::
        ++scheduled_jobs;
    }

+    /// Wake up a free thread to run the new job.
    new_job_or_shutdown.notify_one();

    return static_cast<ReturnType>(true);
 }

+template <typename Thread>
+void ThreadPoolImpl<Thread>::startNewThreadsNoLock()
+{
+    if (shutdown)
+        return;
+
+    /// Start new threads while there are more scheduled jobs in the queue and the limit `max_threads` is not reached.
+    while (threads.size() < std::min(scheduled_jobs, max_threads))
+    {
+        try
+        {
+            threads.emplace_front();
+        }
+        catch (...)
+        {
+            break; /// failed to start more threads
+        }
+
+        try
+        {
+            threads.front() = Thread([this, it = threads.begin()] { worker(it); });
+        }
+        catch (...)
+        {
+            threads.pop_front();
+            break; /// failed to start more threads
+        }
+    }
+}
+
 template <typename Thread>
 void ThreadPoolImpl<Thread>::scheduleOrThrowOnError(Job job, ssize_t priority)
 {
@ -185,20 +240,18 @@ void ThreadPoolImpl<Thread>::scheduleOrThrow(Job job, ssize_t priority, uint64_t
 template <typename Thread>
 void ThreadPoolImpl<Thread>::wait()
 {
-    {
-        std::unique_lock lock(mutex);
-        /// Signal here just in case.
-        /// If threads are waiting on condition variables, but there are some jobs in the queue
-        /// then it will prevent us from deadlock.
-        new_job_or_shutdown.notify_all();
-        job_finished.wait(lock, [this] { return scheduled_jobs == 0; });
+    std::unique_lock lock(mutex);
+    /// Signal here just in case.
+    /// If threads are waiting on condition variables, but there are some jobs in the queue
+    /// then it will prevent us from deadlock.
+    new_job_or_shutdown.notify_all();
+    job_finished.wait(lock, [this] { return scheduled_jobs == 0; });

-        if (first_exception)
-        {
-            std::exception_ptr exception;
-            std::swap(exception, first_exception);
-            std::rethrow_exception(exception);
-        }
+    if (first_exception)
+    {
+        std::exception_ptr exception;
+        std::swap(exception, first_exception);
+        std::rethrow_exception(exception);
    }
 }

@ -219,10 +272,14 @@ void ThreadPoolImpl<Thread>::finalize()
    {
        std::lock_guard lock(mutex);
        shutdown = true;
+        /// We don't want threads to remove themselves from `threads` anymore, otherwise `thread.join()` will go wrong below in this function.
+        threads_remove_themselves = false;
    }

+    /// Wake up threads so they can finish themselves.
    new_job_or_shutdown.notify_all();

+    /// Wait for all currently running jobs to finish (we don't wait for all scheduled jobs here like the function wait() does).
    for (auto & thread : threads)
        thread.join();

@ -268,38 +325,53 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
    CurrentMetrics::Increment metric_all_threads(
        std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);

+    /// Remove this thread from `threads` and detach it, that must be done before exiting from this worker.
+    /// We can't wrap the following lambda function into `SCOPE_EXIT` because it requires `mutex` to be locked.
+    auto detach_thread = [this, thread_it]
+    {
+        /// `mutex` is supposed to be already locked.
+        if (threads_remove_themselves)
+        {
+            thread_it->detach();
+            threads.erase(thread_it);
+        }
+    };
+
+    /// We'll run jobs in this worker while there are scheduled jobs and until some special event occurs (e.g. shutdown, or decreasing the number of max_threads).
+    /// And if `max_free_threads > 0` we keep this number of threads even when there are no jobs for them currently.
    while (true)
    {
        /// This is inside the loop to also reset previous thread names set inside the jobs.
        setThreadName("ThreadPool");

-        Job job;
-        bool need_shutdown = false;
-
        /// A copy of parent trace context
        DB::OpenTelemetry::TracingContextOnThread parent_thead_trace_context;

+        /// Get a job from the queue.
+        Job job;
+        std::exception_ptr exception_from_job;
+        bool need_shutdown = false;
+
        {
            std::unique_lock lock(mutex);
-            new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); });
+            new_job_or_shutdown.wait(lock, [&] { return !jobs.empty() || shutdown || (threads.size() > std::min(max_threads, scheduled_jobs + max_free_threads)); });
            need_shutdown = shutdown;

-            if (!jobs.empty())
+            if (jobs.empty())
            {
-                /// boost::priority_queue does not provide interface for getting non-const reference to an element
-                /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
-                job = std::move(const_cast<Job &>(jobs.top().job));
-                parent_thead_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
-                jobs.pop();
-            }
-            else
-            {
-                /// shutdown is true, simply finish the thread.
+                /// No jobs and either `shutdown` is set or this thread is excessive. The worker will stop.
+                detach_thread();
                return;
            }

+            /// boost::priority_queue does not provide interface for getting non-const reference to an element
+            /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
+            job = std::move(const_cast<Job &>(jobs.top().job));
+            parent_thead_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
+            jobs.pop();
        }

+        /// Run the job. We don't run jobs after `shutdown` is set.
        if (!need_shutdown)
        {
            ALLOW_ALLOCATIONS_IN_SCOPE;
@ -326,46 +398,47 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
                /// job should be reset before decrementing scheduled_jobs to
                /// ensure that the Job destroyed before wait() returns.
                job = {};
-                parent_thead_trace_context.reset();
            }
            catch (...)
            {
-                thread_trace_context.root_span.addAttribute(std::current_exception());
+                exception_from_job = std::current_exception();
+                thread_trace_context.root_span.addAttribute(exception_from_job);

                /// job should be reset before decrementing scheduled_jobs to
                /// ensure that the Job destroyed before wait() returns.
                job = {};
-                parent_thead_trace_context.reset();
-
-                {
-                    std::lock_guard lock(mutex);
-                    if (!first_exception)
-                        first_exception = std::current_exception(); // NOLINT
-                    if (shutdown_on_exception)
-                        shutdown = true;
-                    --scheduled_jobs;
-                }
-
-                job_finished.notify_all();
-                new_job_or_shutdown.notify_all();
-                return;
            }
+
+            parent_thead_trace_context.reset();
        }

+        /// The job is done.
        {
            std::lock_guard lock(mutex);
+            if (exception_from_job)
+            {
+                if (!first_exception)
+                    first_exception = exception_from_job;
+                if (shutdown_on_exception)
+                    shutdown = true;
+            }
+
            --scheduled_jobs;

-            if (threads.size() > scheduled_jobs + max_free_threads)
+            if (threads.size() > std::min(max_threads, scheduled_jobs + max_free_threads))
            {
-                thread_it->detach();
-                threads.erase(thread_it);
+                /// This thread is excessive. The worker will stop.
+                detach_thread();
                job_finished.notify_all();
+                if (shutdown)
+                    new_job_or_shutdown.notify_all(); /// `shutdown` was set, wake up other threads so they can finish themselves.
                return;
            }
-        }

-        job_finished.notify_all();
+            job_finished.notify_all();
+            if (shutdown)
+                new_job_or_shutdown.notify_all(); /// `shutdown` was set, wake up other threads so they can finish themselves.
+        }
    }
 }

--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@ -102,6 +102,7 @@ private:

    size_t scheduled_jobs = 0;
    bool shutdown = false;
+    bool threads_remove_themselves = true;
    const bool shutdown_on_exception = true;

    struct JobWithPriority
@ -129,6 +130,9 @@ private:

    void worker(typename std::list<Thread>::iterator thread_it);

+    /// Tries to start new threads if there are scheduled jobs and the limit `max_threads` is not reached. Must be called with `mutex` locked.
+    void startNewThreadsNoLock();
+
    void finalize();
    void onDestroy();
 };
@ -260,6 +264,11 @@ public:
        return true;
    }

+    std::thread::id get_id() const
+    {
+        return state ? state->thread_id.load() : std::thread::id{};
+    }
+
 protected:
    struct State
    {
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -24,6 +24,8 @@ namespace DB
    M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
    M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
    M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
+    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
+    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
    M(Int32, max_connections, 1024, "Max server connections.", 0) \
    M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
    M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -44,7 +44,7 @@ class IColumn;
    M(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
    M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
    M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
-    M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
+    M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \
    M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
    M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
    M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
@ -152,6 +152,7 @@ class IColumn;
    M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \
    M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
    M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
+    M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
    \
    M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
    \
@ -280,6 +281,7 @@ class IColumn;
    \
    M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \
    \
+    M(Bool, stop_reading_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \
    /** Settings for testing hedged requests */ \
    M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
    M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
@ -413,8 +415,6 @@ class IColumn;
    M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
    M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
    \
-    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
-    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
    M(UInt64, backup_keeper_max_retries, 20, "Max retries for keeper operations during backup", 0) \
    M(UInt64, backup_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup", 0) \
    M(UInt64, backup_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup", 0) \
@ -760,6 +760,8 @@ class IColumn;
    MAKE_OBSOLETE(M, Milliseconds, async_insert_cleanup_timeout_ms, 1000) \
    MAKE_OBSOLETE(M, Bool, optimize_fuse_sum_count_avg, 0) \
    MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
+    MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
+    MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \

    /** The section above is for obsolete settings. Do not add anything there. */

--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@ -102,7 +102,7 @@ void IDataType::forEachSubcolumn(

 template <typename Ptr>
 Ptr IDataType::getForSubcolumn(
-    const String & subcolumn_name,
+    std::string_view subcolumn_name,
    const SubstreamData & data,
    Ptr SubstreamData::*member,
    bool throw_if_null) const
@ -120,36 +120,36 @@ Ptr IDataType::getForSubcolumn(
    return res;
 }

-bool IDataType::hasSubcolumn(const String & subcolumn_name) const
+bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const
 {
    return tryGetSubcolumnType(subcolumn_name) != nullptr;
 }

-DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
+DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const
 {
    auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
    return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
 }

-DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
+DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const
 {
    auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
    return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
 }

-ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
+ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
 {
    auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
    return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
 }

-ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
+ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
 {
    auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
    return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
 }

-SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
+SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const
 {
    auto data = SubstreamData(serialization);
    return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@ -79,15 +79,15 @@ public:
    /// Data type id. It's used for runtime type checks.
    virtual TypeIndex getTypeId() const = 0;

-    bool hasSubcolumn(const String & subcolumn_name) const;
+    bool hasSubcolumn(std::string_view subcolumn_name) const;

-    DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const;
-    DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
+    DataTypePtr tryGetSubcolumnType(std::string_view subcolumn_name) const;
+    DataTypePtr getSubcolumnType(std::string_view subcolumn_name) const;

-    ColumnPtr tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const;
-    ColumnPtr getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const;
+    ColumnPtr tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;
+    ColumnPtr getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;

-    SerializationPtr getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const;
+    SerializationPtr getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const;

    using SubstreamData = ISerialization::SubstreamData;
    using SubstreamPath = ISerialization::SubstreamPath;
@ -315,7 +315,7 @@ public:
 private:
    template <typename Ptr>
    Ptr getForSubcolumn(
-        const String & subcolumn_name,
+        std::string_view subcolumn_name,
        const SubstreamData & data,
        Ptr SubstreamData::*member,
        bool throw_if_null) const;
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -928,7 +928,16 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
    for (const auto & table_id : tables_to_create)
    {
        auto table_name = table_id.getTableName();
-        auto create_query_string = table_name_to_metadata[table_name];
+        auto metadata_it = table_name_to_metadata.find(table_name);
+        if (metadata_it == table_name_to_metadata.end())
+        {
+            /// getTablesSortedByDependency() may return some not existing tables or tables from other databases
+            LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
+                        "but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
+            continue;
+        }
+
+        const auto & create_query_string = metadata_it->second;
        if (isTableExist(table_name, getContext()))
        {
            assert(create_query_string == readMetadataFile(table_name));
--- a/src/Functions/IFunctionDateOrDateTime.h
+++ b/src/Functions/IFunctionDateOrDateTime.h
@ -2,8 +2,10 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
-#include <Functions/IFunction.h>
 #include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+
+#include <Functions/IFunction.h>
 #include <Functions/extractTimeZoneFromFunctionArguments.h>
 #include <Functions/DateTimeTransforms.h>
 #include <Functions/TransformDateTime64.h>
@ -60,6 +62,9 @@ public:

            const auto * type_ptr = &type;

+            if (const auto * lc_type = checkAndGetDataType<DataTypeLowCardinality>(type_ptr))
+                type_ptr = lc_type->getDictionaryType().get();
+
            if (const auto * nullable_type = checkAndGetDataType<DataTypeNullable>(type_ptr))
                type_ptr = nullable_type->getNestedType().get();

--- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp
@ -26,24 +26,24 @@ namespace ErrorCodes
    extern const int BAD_ARGUMENTS;
 }

-static String getRootNodeName(UserDefinedSQLObjectType object_type)
+namespace
 {
-    switch (object_type)
+    std::string_view getNodePrefix(UserDefinedSQLObjectType object_type)
    {
-        case UserDefinedSQLObjectType::Function:
-            return "functions";
+        switch (object_type)
+        {
+            case UserDefinedSQLObjectType::Function:
+                return "function_";
+        }
+        UNREACHABLE();
    }
-    UNREACHABLE();
-}

-static String getRootNodePath(const String & root_path, UserDefinedSQLObjectType object_type)
-{
-    return root_path + "/" + getRootNodeName(object_type);
-}
+    constexpr std::string_view sql_extension = ".sql";

-static String getNodePath(const String & root_path, UserDefinedSQLObjectType object_type, const String & object_name)
-{
-    return getRootNodePath(root_path, object_type) + "/" + escapeForFileName(object_name);
+    String getNodePath(const String & root_path, UserDefinedSQLObjectType object_type, const String & object_name)
+    {
+        return root_path + "/" + String{getNodePrefix(object_type)} + escapeForFileName(object_name) + String{sql_extension};
+    }
 }


@ -119,10 +119,20 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::resetAfterError()

 void UserDefinedSQLObjectsLoaderFromZooKeeper::loadObjects()
 {
+    /// loadObjects() is called at start from Server::main(), so it's better not to stop here on no connection to ZooKeeper or any other error.
+    /// However the watching thread must be started anyway in case the connection will be established later.
    if (!objects_loaded)
    {
-        reloadObjects();
+        try
+        {
+            reloadObjects();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to load user-defined objects");
+        }
    }
+    startWatchingThread();
 }


@ -188,7 +198,6 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::createRootNodes(const zkutil::Zoo
 {
    zookeeper->createAncestors(zookeeper_path);
    zookeeper->createIfNotExists(zookeeper_path, "");
-    zookeeper->createIfNotExists(zookeeper_path + "/functions", "");
 }

 bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject(
@ -344,17 +353,19 @@ Strings UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectNamesAndSetWatch(
    };

    Coordination::Stat stat;
-    const auto path = getRootNodePath(zookeeper_path, object_type);
-    const auto node_names = zookeeper->getChildrenWatch(path, &stat, object_list_watcher);
+    const auto node_names = zookeeper->getChildrenWatch(zookeeper_path, &stat, object_list_watcher);
+    const auto prefix = getNodePrefix(object_type);

    Strings object_names;
    object_names.reserve(node_names.size());
    for (const auto & node_name : node_names)
    {
-        String object_name = unescapeForFileName(node_name);
-
-        if (!object_name.empty())
-            object_names.push_back(std::move(object_name));
+        if (node_name.starts_with(prefix) && node_name.ends_with(sql_extension))
+        {
+            String object_name = unescapeForFileName(node_name.substr(prefix.length(), node_name.length() - prefix.length() - sql_extension.length()));
+            if (!object_name.empty())
+                object_names.push_back(std::move(object_name));
+        }
    }

    return object_names;
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@ -1179,12 +1179,15 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments
            || (res = executeArgument<Int16>(arguments, result_type, builder, input_rows_count))
            || (res = executeArgument<Int32>(arguments, result_type, builder, input_rows_count))
            || (res = executeArgument<Int64>(arguments, result_type, builder, input_rows_count))))
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type.", getName());
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type", getName());
    }
    else
    {
        Field index = (*arguments[1].column)[0];

+        if (index.getType() != Field::Types::UInt64 && index.getType() != Field::Types::Int64)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type", getName());
+
        if (builder)
            builder.initSink(input_rows_count);

--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@ -13,6 +13,7 @@
 #include <Functions/IFunction.h>
 #include <Functions/castTypeToEither.h>
 #include <Functions/extractTimeZoneFromFunctionArguments.h>
+#include <Functions/numLiteralChars.h>

 #include <IO/WriteHelpers.h>

@ -54,55 +55,19 @@ struct FormatDateTimeTraits
 };


-template <typename DataType> struct ActionValueTypeMap {};
-template <> struct ActionValueTypeMap<DataTypeInt8>       { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeUInt8>      { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeInt16>      { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeUInt16>     { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeInt32>      { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeUInt32>     { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeInt64>      { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeUInt64>     { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeDate>       { using ActionValueType = UInt16; };
-template <> struct ActionValueTypeMap<DataTypeDate32>     { using ActionValueType = Int32; };
-template <> struct ActionValueTypeMap<DataTypeDateTime>   { using ActionValueType = UInt32; };
-template <> struct ActionValueTypeMap<DataTypeDateTime64> { using ActionValueType = Int64; };
-
-/// Counts the number of literal characters in Joda format string until the next closing literal
-/// sequence single quote. Returns -1 if no literal single quote was found.
-/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
-/// literal content must be quoted with single quote. and two single quote means literal with one single quote.
-/// For example:
-/// Format string: "'aaaa'", unescaped literal: "aaaa";
-/// Format string: "'aa''aa'", unescaped literal: "aa'aa";
-/// Format string: "'aaa''aa" is not valid because of missing of end single quote.
-Int64 numLiteralChars(const char * cur, const char * end)
-{
-    bool found = false;
-    Int64 count = 0;
-    while (cur < end)
-    {
-        if (*cur == '\'')
-        {
-            if (cur + 1 < end && *(cur + 1) == '\'')
-            {
-                count += 2;
-                cur += 2;
-            }
-            else
-            {
-                found = true;
-                break;
-            }
-        }
-        else
-        {
-            ++count;
-            ++cur;
-        }
-    }
-    return found ? count : -1;
-}
+template <typename DataType> struct InstructionValueTypeMap {};
+template <> struct InstructionValueTypeMap<DataTypeInt8>       { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeUInt8>      { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeInt16>      { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeUInt16>     { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeInt32>      { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeUInt32>     { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeInt64>      { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeUInt64>     { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeDate>       { using InstructionValueType = UInt16; };
+template <> struct InstructionValueTypeMap<DataTypeDate32>     { using InstructionValueType = Int32; };
+template <> struct InstructionValueTypeMap<DataTypeDateTime>   { using InstructionValueType = UInt32; };
+template <> struct InstructionValueTypeMap<DataTypeDateTime64> { using InstructionValueType = Int64; };

 /// Cast value from integer to string, making sure digits number in result string is no less than total_digits by padding leading '0'.
 String padValue(UInt32 val, size_t min_digits)
@ -184,7 +149,7 @@ private:
    }

    template <typename Time>
-    class Action
+    class Instruction
    {
    public:
        /// Using std::function will cause performance degradation in MySQL format by 0.45x.
@ -201,8 +166,8 @@ private:
        /// extra_shift is only used in MySQL format syntax. It is always 0 in Joda format syntax.
        size_t extra_shift = 0;

-        /// Action for appending date/time related number in specified format.
-        explicit Action(Func && func_) : func(std::move(func_)) {}
+        /// Instruction for appending date/time related number in specified format.
+        explicit Instruction(Func && func_) : func(std::move(func_)) {}

        void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
        {
@ -825,8 +790,8 @@ public:
        if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
            scale = times->getScale();

-        using T = typename ActionValueTypeMap<DataType>::ActionValueType;
-        std::vector<Action<T>> instructions;
+        using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
+        std::vector<Instruction<T>> instructions;
        String out_template;
        auto result_size = parseFormat(format, instructions, scale, out_template);

@ -898,27 +863,25 @@ public:
    }

    template <typename T>
-    size_t parseFormat(const String & format, std::vector<Action<T>> & instructions, UInt32 scale, String & out_template) const
+    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
    {
+        static_assert(
+            format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL || format_syntax == FormatDateTimeTraits::FormatSyntax::Joda,
+            "format syntax must be one of MySQL or Joda");
+
        if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL)
            return parseMySQLFormat(format, instructions, scale, out_template);
-        else if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::Joda)
-            return parseJodaFormat(format, instructions, scale, out_template);
        else
-            throw Exception(
-                ErrorCodes::NOT_IMPLEMENTED,
-                "Unknown datetime format style {} in function {}",
-                magic_enum::enum_name(format_syntax),
-                getName());
+            return parseJodaFormat(format, instructions, scale, out_template);
    }

    template <typename T>
-    size_t parseMySQLFormat(const String & format, std::vector<Action<T>> & instructions, UInt32 scale, String & out_template) const
+    size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
    {
        auto add_extra_shift = [&](size_t amount)
        {
            if (instructions.empty())
-                instructions.emplace_back(&Action<T>::mysqlNoop);
+                instructions.emplace_back(&Instruction<T>::mysqlNoop);
            instructions.back().extra_shift += amount;
        };

@ -931,7 +894,7 @@ public:
        };

        const char * pos = format.data();
-        const char * const end = pos + format.size();
+        const char * const end = format.data() + format.size();

        while (true)
        {
@ -953,43 +916,43 @@ public:
                {
                    // Abbreviated weekday [Mon...Sun]
                    case 'a':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextShort);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextShort);
                        out_template += "Mon";
                        break;

                    // Abbreviated month [Jan...Dec]
                    case 'b':
-                        instructions.emplace_back(&Action<T>::mysqlMonthOfYearTextShort);
+                        instructions.emplace_back(&Instruction<T>::mysqlMonthOfYearTextShort);
                        out_template += "Jan";
                        break;

-                    // Month as a decimal number (01-12)
+                    // Month as a integer number (01-12)
                    case 'c':
-                        instructions.emplace_back(&Action<T>::mysqlMonth);
+                        instructions.emplace_back(&Instruction<T>::mysqlMonth);
                        out_template += "00";
                        break;

                    // Year, divided by 100, zero-padded
                    case 'C':
-                        instructions.emplace_back(&Action<T>::mysqlCentury);
+                        instructions.emplace_back(&Instruction<T>::mysqlCentury);
                        out_template += "00";
                        break;

                    // Day of month, zero-padded (01-31)
                    case 'd':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfMonth);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfMonth);
                        out_template += "00";
                        break;

                    // Short MM/DD/YY date, equivalent to %m/%d/%y
                    case 'D':
-                        instructions.emplace_back(&Action<T>::mysqlAmericanDate);
+                        instructions.emplace_back(&Instruction<T>::mysqlAmericanDate);
                        out_template += "00/00/00";
                        break;

                    // Day of month, space-padded ( 1-31)  23
                    case 'e':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfMonthSpacePadded);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfMonthSpacePadded);
                        out_template += " 0";
                        break;

@ -997,86 +960,86 @@ public:
                    case 'f':
                    {
                        /// If the time data type has no fractional part, then we print '0' as the fractional part.
-                        instructions.emplace_back(&Action<T>::mysqlFractionalSecond);
+                        instructions.emplace_back(&Instruction<T>::mysqlFractionalSecond);
                        out_template += String(std::max<UInt32>(1, scale), '0');
                        break;
                    }

                    // Short YYYY-MM-DD date, equivalent to %Y-%m-%d   2001-08-23
                    case 'F':
-                        instructions.emplace_back(&Action<T>::mysqlISO8601Date);
+                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Date);
                        out_template += "0000-00-00";
                        break;

                    // Last two digits of year of ISO 8601 week number (see %G)
                    case 'g':
-                        instructions.emplace_back(&Action<T>::mysqlISO8601Year2);
+                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Year2);
                        out_template += "00";
                        break;

                    // Year of ISO 8601 week number (see %V)
                    case 'G':
-                        instructions.emplace_back(&Action<T>::mysqlISO8601Year4);
+                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Year4);
                        out_template += "0000";
                        break;

                    // Day of the year (001-366)   235
                    case 'j':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfYear);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfYear);
                        out_template += "000";
                        break;

-                    // Month as a decimal number (01-12)
+                    // Month as a integer number (01-12)
                    case 'm':
-                        instructions.emplace_back(&Action<T>::mysqlMonth);
+                        instructions.emplace_back(&Instruction<T>::mysqlMonth);
                        out_template += "00";
                        break;

                    // ISO 8601 weekday as number with Monday as 1 (1-7)
                    case 'u':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfWeek);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek);
                        out_template += "0";
                        break;

                    // ISO 8601 week number (01-53)
                    case 'V':
-                        instructions.emplace_back(&Action<T>::mysqlISO8601Week);
+                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Week);
                        out_template += "00";
                        break;

-                    // Weekday as a decimal number with Sunday as 0 (0-6)  4
+                    // Weekday as a integer number with Sunday as 0 (0-6)  4
                    case 'w':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfWeek0To6);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek0To6);
                        out_template += "0";
                        break;

                    // Full weekday [Monday...Sunday]
                    case 'W':
-                        instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextLong);
+                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextLong);
                        out_template += "Monday";
                        break;

                    // Two digits year
                    case 'y':
-                        instructions.emplace_back(&Action<T>::mysqlYear2);
+                        instructions.emplace_back(&Instruction<T>::mysqlYear2);
                        out_template += "00";
                        break;

                    // Four digits year
                    case 'Y':
-                        instructions.emplace_back(&Action<T>::mysqlYear4);
+                        instructions.emplace_back(&Instruction<T>::mysqlYear4);
                        out_template += "0000";
                        break;

                    // Quarter (1-4)
                    case 'Q':
-                        instructions.template emplace_back(&Action<T>::mysqlQuarter);
+                        instructions.template emplace_back(&Instruction<T>::mysqlQuarter);
                        out_template += "0";
                        break;

                    // Offset from UTC timezone as +hhmm or -hhmm
                    case 'z':
-                        instructions.emplace_back(&Action<T>::mysqlTimezoneOffset);
+                        instructions.emplace_back(&Instruction<T>::mysqlTimezoneOffset);
                        out_template += "+0000";
                        break;

@ -1084,79 +1047,79 @@ public:

                    // Minute (00-59)
                    case 'M':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlMinute, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
                        out_template += "00";
                        break;

                    // AM or PM
                    case 'p':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlAMPM, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlAMPM, 2);
                        out_template += "AM";
                        break;

                    // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
                    case 'r':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHHMM12, 8);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM12, 8);
                        out_template += "12:00 AM";
                        break;

                    // 24-hour HH:MM time, equivalent to %H:%i 14:55
                    case 'R':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHHMM24, 5);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM24, 5);
                        out_template += "00:00";
                        break;

                    // Seconds
                    case 's':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
                        out_template += "00";
                        break;

                    // Seconds
                    case 'S':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
                        out_template += "00";
                        break;

                    // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
                    case 'T':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlISO8601Time, 8);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlISO8601Time, 8);
                        out_template += "00:00:00";
                        break;

                    // Hour in 12h format (01-12)
                    case 'h':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
                        out_template += "12";
                        break;

                    // Hour in 24h format (00-23)
                    case 'H':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
                        out_template += "00";
                        break;

                    // Minute of hour range [0, 59]
                    case 'i':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlMinute, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
                        out_template += "00";
                        break;

                    // Hour in 12h format (01-12)
                    case 'I':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
                        out_template += "12";
                        break;

                    // Hour in 24h format (00-23)
                    case 'k':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
                        out_template += "00";
                        break;

                    // Hour in 12h format (01-12)
                    case 'l':
-                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
+                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
                        out_template += "12";
                        break;

@ -1209,7 +1172,7 @@ public:
    }

    template <typename T>
-    size_t parseJodaFormat(const String & format, std::vector<Action<T>> & instructions, UInt32, String &) const
+    size_t parseJodaFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32, String &) const
    {
        /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal
        auto add_instruction = [&](auto && func [[maybe_unused]], const String & default_literal [[maybe_unused]])
@ -1217,13 +1180,12 @@ public:
            if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
                instructions.emplace_back(func);
            else
-                instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<String>, default_literal));
+                instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<String>, default_literal));
        };

        size_t reserve_size = 0;
        const char * pos = format.data();
-        const char * end = pos + format.size();
-
+        const char * end = format.data() + format.size();
        while (pos < end)
        {
            const char * cur_token = pos;
@ -1235,7 +1197,7 @@ public:
                if (pos + 1 < end && *(pos + 1) == '\'')
                {
                    std::string_view literal(cur_token, 1);
-                    instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
+                    instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
                    ++reserve_size;
                    pos += 2;
                }
@ -1251,7 +1213,7 @@ public:
                        {
                            std::string_view literal(cur_token + i, 1);
                            instructions.emplace_back(
-                                std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
+                                std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
                            ++reserve_size;
                            if (*(cur_token + i) == '\'')
                                i += 1;
@ -1272,115 +1234,115 @@ public:
                switch (*cur_token)
                {
                    case 'G':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaEra, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaEra, repetitions));
                        reserve_size += repetitions <= 3 ? 2 : 13;
                        break;
                    case 'C':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaCenturyOfEra, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaCenturyOfEra, repetitions));
                        /// Year range [1900, 2299]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'Y':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaYearOfEra, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYearOfEra, repetitions));
                        /// Year range [1900, 2299]
                        reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                        break;
                    case 'x':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaWeekYear, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekYear, repetitions));
                        /// weekyear range [1900, 2299]
                        reserve_size += std::max(repetitions, 4);
                        break;
                    case 'w':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaWeekOfWeekYear, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekOfWeekYear, repetitions));
                        /// Week of weekyear range [1, 52]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'e':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfWeek1Based, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeek1Based, repetitions));
                        /// Day of week range [1, 7]
                        reserve_size += std::max(repetitions, 1);
                        break;
                    case 'E':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfWeekText, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeekText, repetitions));
                        /// Maximum length of short name is 3, maximum length of full name is 9.
                        reserve_size += repetitions <= 3 ? 3 : 9;
                        break;
                    case 'y':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaYear, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYear, repetitions));
                        /// Year range [1900, 2299]
                        reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                        break;
                    case 'D':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfYear, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfYear, repetitions));
                        /// Day of year range [1, 366]
                        reserve_size += std::max(repetitions, 3);
                        break;
                    case 'M':
                        if (repetitions <= 2)
                        {
-                            instructions.emplace_back(std::bind_front(&Action<T>::jodaMonthOfYear, repetitions));
+                            instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYear, repetitions));
                            /// Month of year range [1, 12]
                            reserve_size += 2;
                        }
                        else
                        {
-                            instructions.emplace_back(std::bind_front(&Action<T>::jodaMonthOfYearText, repetitions));
+                            instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYearText, repetitions));
                            /// Maximum length of short name is 3, maximum length of full name is 9.
                            reserve_size += repetitions <= 3 ? 3 : 9;
                        }
                        break;
                    case 'd':
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfMonth, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfMonth, repetitions));
                        /// Day of month range [1, 3]
                        reserve_size += std::max(repetitions, 3);
                        break;
                    case 'a':
                        /// Default half day of day is "AM"
-                        add_instruction(std::bind_front(&Action<T>::jodaHalfDayOfDay, repetitions), "AM");
+                        add_instruction(std::bind_front(&Instruction<T>::jodaHalfDayOfDay, repetitions), "AM");
                        reserve_size += 2;
                        break;
                    case 'K':
                        /// Default hour of half day is 0
                        add_instruction(
-                            std::bind_front(&Action<T>::jodaHourOfHalfDay, repetitions), padValue(0, repetitions));
+                            std::bind_front(&Instruction<T>::jodaHourOfHalfDay, repetitions), padValue(0, repetitions));
                        /// Hour of half day range [0, 11]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'h':
                        /// Default clock hour of half day is 12
                        add_instruction(
-                            std::bind_front(&Action<T>::jodaClockHourOfHalfDay, repetitions),
+                            std::bind_front(&Instruction<T>::jodaClockHourOfHalfDay, repetitions),
                            padValue(12, repetitions));
                        /// Clock hour of half day range [1, 12]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'H':
                        /// Default hour of day is 0
-                        add_instruction(std::bind_front(&Action<T>::jodaHourOfDay, repetitions), padValue(0, repetitions));
+                        add_instruction(std::bind_front(&Instruction<T>::jodaHourOfDay, repetitions), padValue(0, repetitions));
                        /// Hour of day range [0, 23]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'k':
                        /// Default clock hour of day is 24
-                        add_instruction(std::bind_front(&Action<T>::jodaClockHourOfDay, repetitions), padValue(24, repetitions));
+                        add_instruction(std::bind_front(&Instruction<T>::jodaClockHourOfDay, repetitions), padValue(24, repetitions));
                        /// Clock hour of day range [1, 24]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'm':
                        /// Default minute of hour is 0
-                        add_instruction(std::bind_front(&Action<T>::jodaMinuteOfHour, repetitions), padValue(0, repetitions));
+                        add_instruction(std::bind_front(&Instruction<T>::jodaMinuteOfHour, repetitions), padValue(0, repetitions));
                        /// Minute of hour range [0, 59]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 's':
                        /// Default second of minute is 0
-                        add_instruction(std::bind_front(&Action<T>::jodaSecondOfMinute, repetitions), padValue(0, repetitions));
+                        add_instruction(std::bind_front(&Instruction<T>::jodaSecondOfMinute, repetitions), padValue(0, repetitions));
                        /// Second of minute range [0, 59]
                        reserve_size += std::max(repetitions, 2);
                        break;
                    case 'S':
                        /// Default fraction of second is 0
-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaFractionOfSecond, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaFractionOfSecond, repetitions));
                        /// 'S' repetitions range [0, 9]
                        reserve_size += repetitions <= 9 ? repetitions : 9;
                        break;
@ -1388,7 +1350,7 @@ public:
                        if (repetitions <= 3)
                            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported");

-                        instructions.emplace_back(std::bind_front(&Action<T>::jodaTimezone, repetitions));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaTimezone, repetitions));
                        /// Longest length of full name of time zone is 32.
                        reserve_size += 32;
                        break;
@ -1399,7 +1361,7 @@ public:
                            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions));

                        std::string_view literal(cur_token, pos - cur_token);
-                        instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
+                        instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
                        reserve_size += pos - cur_token;
                        break;
                }
--- a/src/Functions/getSubcolumn.cpp
+++ b/src/Functions/getSubcolumn.cpp
@ -0,0 +1,67 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+namespace
+{
+
+class FunctionGetSubcolumn : public IFunction
+{
+public:
+    static constexpr auto name = "getSubcolumn";
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionGetSubcolumn>(); }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        auto subcolumn_name = getSubcolumnName(arguments);
+        return arguments[0].type->getSubcolumnType(subcolumn_name);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        auto subcolumn_name = getSubcolumnName(arguments);
+        return arguments[0].type->getSubcolumn(subcolumn_name, arguments[0].column);
+    }
+
+private:
+    static std::string_view getSubcolumnName(const ColumnsWithTypeAndName & arguments)
+    {
+        const auto * column = arguments[1].column.get();
+        if (!isString(arguments[1].type) || !column || !checkAndGetColumnConstStringOrFixedString(column))
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "The second argument of function {} should be a constant string with the name of a subcolumn", name);
+
+        return column->getDataAt(0).toView();
+    }
+};
+
+}
+
+REGISTER_FUNCTION(GetSubcolumn)
+{
+    factory.registerFunction<FunctionGetSubcolumn>({
+        R"(
+Receives the expression or identifier and constant string with the name of subcolumn.
+
+Returns requested subcolumn extracted from the expression.
+)",
+        Documentation::Examples{{"getSubcolumn", "SELECT getSubcolumn(array_col, 'size0'), getSubcolumn(tuple_col, 'elem_name')"}},
+        Documentation::Categories{"OtherFunctions"}
+    });
+}
+
+}
--- a/src/Functions/numLiteralChars.h
+++ b/src/Functions/numLiteralChars.h
@ -0,0 +1,44 @@
+#pragma once
+
+#include <base/types.h>
+
+namespace DB
+{
+
+/// Counts the number of literal characters in Joda format string until the next closing literal
+/// sequence single quote. Returns -1 if no literal single quote was found.
+/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
+/// literal content must be quoted with single quote. and two single quote means literal with one single quote.
+/// For example:
+/// Format string: "'aaaa'", unescaped literal: "aaaa";
+/// Format string: "'aa''aa'", unescaped literal: "aa'aa";
+/// Format string: "'aaa''aa" is not valid because of missing of end single quote.
+inline Int64 numLiteralChars(const char * cur, const char * end)
+{
+    bool found = false;
+    Int64 count = 0;
+    while (cur < end)
+    {
+        if (*cur == '\'')
+        {
+            if (cur + 1 < end && *(cur + 1) == '\'')
+            {
+                count += 2;
+                cur += 2;
+            }
+            else
+            {
+                found = true;
+                break;
+            }
+        }
+        else
+        {
+            ++count;
+            ++cur;
+        }
+    }
+    return found ? count : -1;
+}
+
+}
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@ -159,6 +159,8 @@ namespace detail

            if (out_stream_callback)
                request.setChunkedTransferEncoding(true);
+            else if (method == Poco::Net::HTTPRequest::HTTP_POST)
+                request.setContentLength(0);    /// No callback - no body

            for (auto & [header, value] : http_header_entries)
                request.set(header, value);
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -23,6 +23,8 @@
 namespace ProfileEvents
 {
    extern const Event WriteBufferFromS3Bytes;
+    extern const Event WriteBufferFromS3Microseconds;
+    extern const Event WriteBufferFromS3RequestsErrors;
    extern const Event S3WriteBytes;

    extern const Event S3CreateMultipartUpload;
@ -200,7 +202,11 @@ void WriteBufferFromS3::createMultipartUpload()
    if (write_settings.for_object_storage)
        ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);

+    Stopwatch watch;
    auto outcome = client_ptr->CreateMultipartUpload(req);
+    watch.stop();
+
+    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());

    if (outcome.IsSuccess())
    {
@ -208,7 +214,10 @@ void WriteBufferFromS3::createMultipartUpload()
        LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id);
    }
    else
+    {
+        ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
        throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
+    }
 }

 void WriteBufferFromS3::writePart()
@ -345,9 +354,13 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)

    ResourceCost cost = task.req.GetContentLength();
    ResourceGuard rlock(write_settings.resource_link, cost);
+    Stopwatch watch;
    auto outcome = client_ptr->UploadPart(task.req);
+    watch.stop();
    rlock.unlock(); // Avoid acquiring other locks under resource lock

+    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
+
    if (outcome.IsSuccess())
    {
        task.tag = outcome.GetResult().GetETag();
@ -356,6 +369,7 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
    }
    else
    {
+        ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
        write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
        throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
    }
@ -391,27 +405,41 @@ void WriteBufferFromS3::completeMultipartUpload()
        if (write_settings.for_object_storage)
            ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);

+        Stopwatch watch;
        auto outcome = client_ptr->CompleteMultipartUpload(req);
+        watch.stop();
+
+        ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());

        if (outcome.IsSuccess())
        {
            LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size());
-            break;
-        }
-        else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
-        {
-            /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
-            /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it
-            LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size());
+            return;
        }
        else
        {
-            throw S3Exception(
-                outcome.GetError().GetErrorType(),
-                "Message: {}, Key: {}, Bucket: {}, Tags: {}",
-                outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
+            ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
+
+            if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
+            {
+                /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
+                /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it
+                LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size());
+            }
+            else
+            {
+                throw S3Exception(
+                    outcome.GetError().GetErrorType(),
+                    "Message: {}, Key: {}, Bucket: {}, Tags: {}",
+                    outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
+            }
        }
    }
+
+    throw S3Exception(
+        Aws::S3::S3Errors::NO_SUCH_KEY,
+        "Message: Multipart upload failed with NO_SUCH_KEY error, retries {}, Key: {}, Bucket: {}",
+        max_retry, key, bucket);
 }

 void WriteBufferFromS3::makeSinglepartUpload()
@ -501,30 +529,43 @@ void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)

        ResourceCost cost = task.req.GetContentLength();
        ResourceGuard rlock(write_settings.resource_link, cost);
+        Stopwatch watch;
        auto outcome = client_ptr->PutObject(task.req);
+        watch.stop();
        rlock.unlock();

+        ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
+
        bool with_pool = static_cast<bool>(schedule);
        if (outcome.IsSuccess())
        {
            LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool);
-            break;
-        }
-        else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
-        {
-            write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
-            /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
-            LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool);
+            return;
        }
        else
        {
-            write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
-            throw S3Exception(
-                outcome.GetError().GetErrorType(),
-                "Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
-                outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
+            ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
+            if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
+            {
+                write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
+                /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
+                LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool);
+            }
+            else
+            {
+                write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
+                throw S3Exception(
+                    outcome.GetError().GetErrorType(),
+                    "Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
+                    outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
+            }
        }
    }
+
+    throw S3Exception(
+        Aws::S3::S3Errors::NO_SUCH_KEY,
+        "Message: Single part upload failed with NO_SUCH_KEY error, retries {}, Key: {}, Bucket: {}",
+        max_retry, key, bucket);
 }

 void WriteBufferFromS3::waitForReadyBackGroundTasks()
--- a/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/src/IO/ZlibInflatingReadBuffer.cpp
@ -6,6 +6,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ZLIB_INFLATE_FAILED;
+    extern const int ARGUMENT_OUT_OF_BOUND;
 }

 ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
@ -17,6 +18,11 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
    : CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
    , eof_flag(false)
 {
+    if (buf_size > max_buffer_size)
+        throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
+            "Zlib does not support decompression with buffer size greater than {}, got buffer size: {}",
+            max_buffer_size, buf_size);
+
    zstr.zalloc = nullptr;
    zstr.zfree = nullptr;
    zstr.opaque = nullptr;
@ -31,10 +37,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
        window_bits += 16;
    }

-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wold-style-cast"
    int rc = inflateInit2(&zstr, window_bits);
-#pragma GCC diagnostic pop

    if (rc != Z_OK)
        throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION);
@ -61,16 +64,22 @@ bool ZlibInflatingReadBuffer::nextImpl()
        {
            in->nextIfAtEnd();
            zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
-            zstr.avail_in = static_cast<unsigned>(in->buffer().end() - in->position());
+            zstr.avail_in = static_cast<BufferSizeType>(std::min(
+                static_cast<UInt64>(in->buffer().end() - in->position()),
+                static_cast<UInt64>(max_buffer_size)));
        }
+
        /// init output bytes (place, where decompressed data will be)
        zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
-        zstr.avail_out = static_cast<unsigned>(internal_buffer.size());
+        zstr.avail_out = static_cast<BufferSizeType>(internal_buffer.size());

+        size_t old_total_in = zstr.total_in;
        int rc = inflate(&zstr, Z_NO_FLUSH);

        /// move in stream on place, where reading stopped
-        in->position() = in->buffer().end() - zstr.avail_in;
+        size_t bytes_read = zstr.total_in - old_total_in;
+        in->position() += bytes_read;
+
        /// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values)
        working_buffer.resize(internal_buffer.size() - zstr.avail_out);

@ -94,9 +103,10 @@ bool ZlibInflatingReadBuffer::nextImpl()
                return true;
            }
        }
+
        /// If it is not end and not OK, something went wrong, throw exception
        if (rc != Z_OK)
-            throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
+            throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc));
    }
    while (working_buffer.empty());

--- a/src/IO/ZlibInflatingReadBuffer.h
+++ b/src/IO/ZlibInflatingReadBuffer.h
@ -4,6 +4,7 @@
 #include <IO/CompressedReadBufferWrapper.h>
 #include <IO/CompressionMethod.h>

+#include <limits>
 #include <zlib.h>


@ -33,6 +34,11 @@ private:

    z_stream zstr;
    bool eof_flag;
+
+    /// Limit size of buffer because zlib uses
+    /// UInt32 for sizes of internal buffers.
+    using BufferSizeType =  decltype(zstr.avail_in);
+    static constexpr auto max_buffer_size = std::numeric_limits<BufferSizeType>::max();
 };

 }
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -9,6 +9,7 @@
 #include <Functions/materialize.h>
 #include <Functions/FunctionsLogical.h>
 #include <Functions/CastOverloadResolver.h>
+#include <Functions/indexHint.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ArrayJoinAction.h>
 #include <IO/WriteBufferFromString.h>
@ -188,9 +189,9 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::strin
 }

 const ActionsDAG::Node & ActionsDAG::addFunction(
-        const FunctionOverloadResolverPtr & function,
-        NodeRawConstPtrs children,
-        std::string result_name)
+    const FunctionOverloadResolverPtr & function,
+    NodeRawConstPtrs children,
+    std::string result_name)
 {
    auto [arguments, all_const] = getFunctionArguments(children);

@ -1364,6 +1365,83 @@ void ActionsDAG::mergeInplace(ActionsDAG && second)
    first.projected_output = second.projected_output;
 }

+void ActionsDAG::mergeNodes(ActionsDAG && second)
+{
+    std::unordered_map<std::string, const ActionsDAG::Node *> node_name_to_node;
+    for (auto & node : nodes)
+        node_name_to_node.emplace(node.result_name, &node);
+
+    struct Frame
+    {
+        ActionsDAG::Node * node = nullptr;
+        bool visited_children = false;
+    };
+
+    std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> const_node_to_node;
+    for (auto & node : second.nodes)
+        const_node_to_node.emplace(&node, &node);
+
+    std::vector<Frame> nodes_to_process;
+    nodes_to_process.reserve(second.getOutputs().size());
+    for (auto & node : second.getOutputs())
+        nodes_to_process.push_back({const_node_to_node.at(node), false /*visited_children*/});
+
+    std::unordered_set<const ActionsDAG::Node *> nodes_to_move_from_second_dag;
+
+    while (!nodes_to_process.empty())
+    {
+        auto & node_to_process = nodes_to_process.back();
+        auto * node = node_to_process.node;
+
+        auto node_it = node_name_to_node.find(node->result_name);
+        if (node_it != node_name_to_node.end())
+        {
+            nodes_to_process.pop_back();
+            continue;
+        }
+
+        if (!node_to_process.visited_children)
+        {
+            node_to_process.visited_children = true;
+
+            for (auto & child : node->children)
+                nodes_to_process.push_back({const_node_to_node.at(child), false /*visited_children*/});
+
+            /// If node has children process them first
+            if (!node->children.empty())
+                continue;
+        }
+
+        for (auto & child : node->children)
+            child = node_name_to_node.at(child->result_name);
+
+        node_name_to_node.emplace(node->result_name, node);
+        nodes_to_move_from_second_dag.insert(node);
+
+        nodes_to_process.pop_back();
+    }
+
+    if (nodes_to_move_from_second_dag.empty())
+        return;
+
+    auto second_nodes_end = second.nodes.end();
+    for (auto second_node_it = second.nodes.begin(); second_node_it != second_nodes_end;)
+    {
+        if (!nodes_to_move_from_second_dag.contains(&(*second_node_it)))
+        {
+            ++second_node_it;
+            continue;
+        }
+
+        auto node_to_move_it = second_node_it;
+        ++second_node_it;
+        nodes.splice(nodes.end(), second.nodes, node_to_move_it);
+
+        if (node_to_move_it->type == ActionType::INPUT)
+            inputs.push_back(&(*node_to_move_it));
+    }
+}
+
 ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
 {
    /// Split DAG into two parts.
@ -2193,7 +2271,8 @@ bool ActionsDAG::isSortingPreserved(
 ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
    const NodeRawConstPtrs & filter_nodes,
    const std::unordered_map<std::string, ColumnWithTypeAndName> & node_name_to_input_node_column,
-    const ContextPtr & context)
+    const ContextPtr & context,
+    bool single_output_condition_node)
 {
    if (filter_nodes.empty())
        return nullptr;
@ -2281,13 +2360,35 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
                NodeRawConstPtrs function_children;
                function_children.reserve(node->children.size());

+                FunctionOverloadResolverPtr function_overload_resolver;
+
+                if (node->function_base->getName() == "indexHint")
+                {
+                    ActionsDAG::NodeRawConstPtrs children;
+                    if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node->function_base.get()))
+                    {
+                        if (const auto * index_hint = typeid_cast<const FunctionIndexHint *>(adaptor->getFunction().get()))
+                        {
+                            auto index_hint_filter_dag = buildFilterActionsDAG(index_hint->getActions()->getOutputs(),
+                                node_name_to_input_node_column,
+                                context,
+                                false /*single_output_condition_node*/);
+
+                            auto index_hint_function_clone = std::make_shared<FunctionIndexHint>();
+                            index_hint_function_clone->setActions(std::move(index_hint_filter_dag));
+                            function_overload_resolver = std::make_shared<FunctionToOverloadResolverAdaptor>(std::move(index_hint_function_clone));
+                        }
+                    }
+                }
+
                for (const auto & child : node->children)
                    function_children.push_back(node_to_result_node.find(child)->second);

                auto [arguments, all_const] = getFunctionArguments(function_children);
+                auto function_base = function_overload_resolver ? function_overload_resolver->build(arguments) : node->function_base;

                result_node = &result_dag->addFunctionImpl(
-                    node->function_base,
+                    function_base,
                    std::move(function_children),
                    std::move(arguments),
                    {},
@ -2307,7 +2408,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
    for (const auto & node : filter_nodes)
        result_dag_outputs.push_back(node_to_result_node.find(node)->second);

-    if (result_dag_outputs.size() > 1)
+    if (result_dag_outputs.size() > 1 && single_output_condition_node)
    {
        auto function_builder = FunctionFactory::instance().get("and", context);
        result_dag_outputs = { &result_dag->addFunction(function_builder, result_dag_outputs, {}) };
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@ -290,6 +290,9 @@ public:
    /// So that pointers to nodes are kept valid.
    void mergeInplace(ActionsDAG && second);

+    /// Merge current nodes with specified dag nodes
+    void mergeNodes(ActionsDAG && second);
+
    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;

    /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
@ -344,15 +347,18 @@ public:
      * Additionally during dag construction if node has name that exists in node_name_to_input_column map argument
      * in final dag this node is represented as INPUT node with specified column.
      *
-      * Result dag has only single output node:
+      * If single_output_condition_node = true, result dag has single output node:
      * 1. If there is single filter node, result dag output will contain this node.
      * 2. If there are multiple filter nodes, result dag output will contain single `and` function node
      * and children of this node will be filter nodes.
+      *
+      * If single_output_condition_node = false, result dag has multiple output nodes.
      */
    static ActionsDAGPtr buildFilterActionsDAG(
        const NodeRawConstPtrs & filter_nodes,
        const std::unordered_map<std::string, ColumnWithTypeAndName> & node_name_to_input_node_column,
-        const ContextPtr & context);
+        const ContextPtr & context,
+        bool single_output_condition_node = true);

 private:
    NodeRawConstPtrs getParents(const Node * target) const;
--- a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp
+++ b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp
@ -0,0 +1,100 @@
+#include <Interpreters/ComparisonTupleEliminationVisitor.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+ASTs splitTuple(const ASTPtr & node)
+{
+    if (const auto * func = node->as<ASTFunction>(); func && func->name == "tuple")
+        return func->arguments->children;
+
+    if (const auto * literal = node->as<ASTLiteral>(); literal && literal->value.getType() == Field::Types::Tuple)
+    {
+        ASTs result;
+        const auto & tuple = literal->value.get<const Tuple &>();
+        for (const auto & child : tuple)
+            result.emplace_back(std::make_shared<ASTLiteral>(child));
+        return result;
+    }
+
+    return {};
+}
+
+ASTPtr concatWithAnd(const ASTs & nodes)
+{
+    if (nodes.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot concat empty list of nodes");
+
+    if (nodes.size() == 1)
+        return nodes[0];
+
+    auto result = makeASTFunction("and");
+    result->arguments->children = nodes;
+    return result;
+}
+
+class SplitTupleComparsionExpressionMatcher
+{
+public:
+    using Data = ComparisonTupleEliminationMatcher::Data;
+
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+    static void visit(ASTPtr & ast, Data &)
+    {
+        auto * func = ast->as<ASTFunction>();
+        if (!func || func->arguments->children.size() != 2)
+            return;
+
+        if (func->name != "equals" && func->name != "notEquals")
+            return;
+
+        auto lhs = splitTuple(func->arguments->children[0]);
+        auto rhs = splitTuple(func->arguments->children[1]);
+        if (lhs.size() != rhs.size() || lhs.empty())
+            return;
+
+        ASTs new_args;
+        new_args.reserve(lhs.size());
+        for (size_t i = 0; i < lhs.size(); ++i)
+        {
+            new_args.emplace_back(makeASTFunction("equals", lhs[i], rhs[i]));
+        }
+
+        if (func->name == "notEquals")
+            ast = makeASTFunction("not", concatWithAnd(new_args));
+        else
+            ast = concatWithAnd(new_args);
+    }
+};
+
+using SplitTupleComparsionExpressionVisitor = InDepthNodeVisitor<SplitTupleComparsionExpressionMatcher, true>;
+
+}
+
+bool ComparisonTupleEliminationMatcher::needChildVisit(ASTPtr &, const ASTPtr &)
+{
+    return true;
+}
+
+void ComparisonTupleEliminationMatcher::visit(ASTPtr & ast, Data & data)
+{
+    auto * select_ast = ast->as<ASTSelectQuery>();
+    if (!select_ast || !select_ast->where())
+        return;
+
+    if (select_ast->where())
+        SplitTupleComparsionExpressionVisitor(data).visit(select_ast->refWhere());
+}
+
+}
--- a/src/Interpreters/ComparisonTupleEliminationVisitor.h
+++ b/src/Interpreters/ComparisonTupleEliminationVisitor.h
@ -0,0 +1,28 @@
+#pragma once
+
+#include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/Aliases.h>
+
+namespace DB
+{
+
+class ASTSelectQuery;
+struct TableWithColumnNamesAndTypes;
+
+/** Replaces tuple comparisons with multiple comparisons.
+  *
+  * Example: SELECT id FROM test_table WHERE (id, value) = (1, 'Value');
+  * Result: SELECT id FROM test_table WHERE id = 1 AND value = 'Value';
+  */
+class ComparisonTupleEliminationMatcher
+{
+public:
+    struct Data {};
+
+    static bool needChildVisit(ASTPtr &, const ASTPtr &);
+    static void visit(ASTPtr & ast, Data & data);
+};
+
+using ComparisonTupleEliminationVisitor = InDepthNodeVisitor<ComparisonTupleEliminationMatcher, true>;
+
+}
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1918,8 +1918,13 @@ BackupsWorker & Context::getBackupsWorker() const
    const bool allow_concurrent_backups = this->getConfigRef().getBool("backups.allow_concurrent_backups", true);
    const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true);

+    const auto & config = getConfigRef();
+    const auto & settings = getSettingsRef();
+    UInt64 backup_threads = config.getUInt64("backup_threads", settings.backup_threads);
+    UInt64 restore_threads = config.getUInt64("restore_threads", settings.restore_threads);
+
    if (!shared->backups_worker)
-        shared->backups_worker.emplace(getSettingsRef().backup_threads, getSettingsRef().restore_threads, allow_concurrent_backups, allow_concurrent_restores);
+        shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);

    return *shared->backups_worker;
 }
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -415,7 +415,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr);
    }

-    JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols);
+    JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view);

    bool got_storage_from_query = false;
    if (!has_input && !storage)
@ -636,14 +636,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
                const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();

-                MergeTreeWhereOptimizer{
-                    current_info,
-                    context,
+                MergeTreeWhereOptimizer where_optimizer{
                    std::move(column_compressed_sizes),
                    metadata_snapshot,
                    queried_columns,
                    supported_prewhere_columns,
                    log};
+
+                where_optimizer.optimize(current_info, context);
            }
        }

@ -2874,8 +2874,10 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st
    SortDescription sort_description = getSortDescription(query, context);
    const UInt64 limit = getLimitForSorting(query, context);
    const auto max_block_size = context->getSettingsRef().max_block_size;
+    const auto exact_rows_before_limit = context->getSettingsRef().exact_rows_before_limit;

-    auto merging_sorted = std::make_unique<SortingStep>(query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit);
+    auto merging_sorted = std::make_unique<SortingStep>(
+        query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
    merging_sorted->setStepDescription("Merge sorted streams " + description);
    query_plan.addStep(std::move(merging_sorted));
 }
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@ -262,12 +262,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
 {
    if (!context_->hasQueryContext())
    {
+        SelectQueryOptions options;
        if (is_subquery)
-            return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
-        else if (is_create_parameterized_view)
-            return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()).getSampleBlock();
-        else
-            return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
+            options = options.subquery();
+        if (is_create_parameterized_view)
+            options = options.createParameterizedView();
+        return InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
    }

    auto & cache = context_->getSampleBlockCache();
@ -278,21 +278,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
        return cache[key];
    }

+    SelectQueryOptions options;
    if (is_subquery)
-    {
-        return cache[key]
-            = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
-    }
-    else if (is_create_parameterized_view)
-    {
-        return cache[key]
-            = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze())
-            .getSampleBlock();
-    }
-    else
-    {
-        return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
-    }
+        options = options.subquery();
+    if (is_create_parameterized_view)
+        options = options.createParameterizedView();
+    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
 }


--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@ -173,13 +173,14 @@ using RenameQualifiedIdentifiersVisitor = InDepthNodeVisitor<RenameQualifiedIden

 }

-JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query_, bool include_all_columns_)
+JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query_, bool include_all_columns_, bool is_create_parameterized_view_)
    : context(context_)
    , table_expressions(getTableExpressions(select_query_))
    , include_all_columns(include_all_columns_)
    , left_table_expression(extractTableExpression(select_query_, 0))
    , left_db_and_table(getDatabaseAndTable(select_query_, 0))
    , select_query(select_query_)
+    , is_create_parameterized_view(is_create_parameterized_view_)
 {}

 bool JoinedTables::isLeftTableSubquery() const
@ -239,7 +240,7 @@ bool JoinedTables::resolveTables()
    const auto & settings = context->getSettingsRef();
    bool include_alias_cols = include_all_columns || settings.asterisk_include_alias_columns;
    bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns;
-    tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols);
+    tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols, is_create_parameterized_view);
    if (tables_with_columns.size() != table_expressions.size())
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected tables count");

--- a/src/Interpreters/JoinedTables.h
+++ b/src/Interpreters/JoinedTables.h
@ -22,7 +22,7 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 class JoinedTables
 {
 public:
-    JoinedTables(ContextPtr context, const ASTSelectQuery & select_query_, bool include_all_columns_ = false);
+    JoinedTables(ContextPtr context, const ASTSelectQuery & select_query_, bool include_all_columns_ = false, bool is_create_parameterized_view_ = false);

    void reset(const ASTSelectQuery & select_query);

@ -53,6 +53,7 @@ private:
    ASTPtr left_table_expression;
    std::optional<DatabaseAndTableWithAlias> left_db_and_table;
    const ASTSelectQuery & select_query;
+    const bool is_create_parameterized_view;
 };

 }
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -8,6 +8,7 @@

 #include <Interpreters/ArrayJoinedColumnsVisitor.h>
 #include <Interpreters/CollectJoinOnKeysVisitor.h>
+#include <Interpreters/ComparisonTupleEliminationVisitor.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
 #include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
@ -1424,6 +1425,13 @@ void TreeRewriter::normalize(
    if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names)
        FunctionNameNormalizer().visit(query.get());

+    if (settings.optimize_move_to_prewhere)
+    {
+        /// Required for PREWHERE
+        ComparisonTupleEliminationVisitor::Data data_comparison_tuple_elimination;
+        ComparisonTupleEliminationVisitor(data_comparison_tuple_elimination).visit(query);
+    }
+
    /// Common subexpression elimination. Rewrite rules.
    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, is_create_parameterized_view);
    QueryNormalizer(normalizer_data).visit(query);
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@ -73,18 +73,21 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number
    return nullptr;
 }

+/// The parameter is_create_parameterized_view is used in getSampleBlock of the subquery.
+/// If it is set to true, then query parameters are allowed in the subquery, and that expression is not evaluated.
 static NamesAndTypesList getColumnsFromTableExpression(
    const ASTTableExpression & table_expression,
    ContextPtr context,
    NamesAndTypesList & materialized,
    NamesAndTypesList & aliases,
-    NamesAndTypesList & virtuals)
+    NamesAndTypesList & virtuals,
+    bool is_create_parameterized_view)
 {
    NamesAndTypesList names_and_type_list;
    if (table_expression.subquery)
    {
        const auto & subquery = table_expression.subquery->children.at(0);
-        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true).getNamesAndTypesList();
+        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true, is_create_parameterized_view).getNamesAndTypesList();
    }
    else if (table_expression.table_function)
    {
@ -117,7 +120,8 @@ TablesWithColumns getDatabaseAndTablesWithColumns(
        const ASTTableExprConstPtrs & table_expressions,
        ContextPtr context,
        bool include_alias_cols,
-        bool include_materialized_cols)
+        bool include_materialized_cols,
+        bool is_create_parameterized_view)
 {
    TablesWithColumns tables_with_columns;

@ -129,7 +133,7 @@ TablesWithColumns getDatabaseAndTablesWithColumns(
        NamesAndTypesList aliases;
        NamesAndTypesList virtuals;
        NamesAndTypesList names_and_types = getColumnsFromTableExpression(
-            *table_expression, context, materialized, aliases, virtuals);
+            *table_expression, context, materialized, aliases, virtuals, is_create_parameterized_view);

        removeDuplicateColumns(names_and_types);

--- a/src/Interpreters/getTableExpressions.h
+++ b/src/Interpreters/getTableExpressions.h
@ -20,7 +20,9 @@ const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, siz

 ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);

+/// The parameter is_create_parameterized_view is used in getSampleBlock of the subquery. It is forwarded to getColumnsFromTableExpression.
+/// If it is set to true, then query parameters are allowed in the subquery, and that expression is not evaluated.
 TablesWithColumns getDatabaseAndTablesWithColumns(
-    const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols);
+    const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols, bool is_create_parameterized_view = false);

 }
--- a/src/Parsers/Access/ASTGrantQuery.cpp
+++ b/src/Parsers/Access/ASTGrantQuery.cpp
@ -119,7 +119,6 @@ ASTPtr ASTGrantQuery::clone() const
 void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
 {
    settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (attach_mode ? "ATTACH " : "")
-                  << (settings.hilite ? hilite_keyword : "") << ((!is_revoke && (replace_access || replace_granted_roles)) ? "REPLACE " : "") << (settings.hilite ? hilite_none : "")
                  << (settings.hilite ? hilite_keyword : "") << (is_revoke ? "REVOKE" : "GRANT")
                  << (settings.hilite ? IAST::hilite_none : "");

@ -161,6 +160,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F
            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH GRANT OPTION" << (settings.hilite ? hilite_none : "");
        else if (admin_option)
            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH ADMIN OPTION" << (settings.hilite ? hilite_none : "");
+
+        if (replace_access || replace_granted_roles)
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH REPLACE OPTION" << (settings.hilite ? hilite_none : "");
    }
 }

--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@ -2255,6 +2255,7 @@ std::vector<std::pair<const char *, Operator>> ParserExpressionImpl::operators_t
        {"ILIKE",         Operator("ilike",           8,  2)},
        {"NOT LIKE",      Operator("notLike",         8,  2)},
        {"NOT ILIKE",     Operator("notILike",        8,  2)},
+        {"REGEXP",        Operator("match",           8,  2)},
        {"IN",            Operator("in",              8,  2)},
        {"NOT IN",        Operator("notIn",           8,  2)},
        {"GLOBAL IN",     Operator("globalIn",        8,  2)},
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@ -10,6 +10,7 @@
 #include <Analyzer/TableFunctionNode.h>

 #include <Planner/PlannerContext.h>
+#include <Planner/PlannerActionsVisitor.h>

 namespace DB
 {
@ -17,6 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_PREWHERE;
 }

 namespace
@ -78,23 +80,128 @@ public:

    static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
    {
-        return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
+        auto child_node_type = child_node->getNodeType();
+        return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
    }

 private:
    PlannerContext & planner_context;
 };

+class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor<CollectPrewhereTableExpressionVisitor>
+{
+public:
+    explicit CollectPrewhereTableExpressionVisitor(const QueryTreeNodePtr & query_node_)
+        : query_node(query_node_)
+    {}
+
+    const QueryTreeNodePtr & getPrewhereTableExpression() const
+    {
+        return table_expression;
+    }
+
+    void visitImpl(const QueryTreeNodePtr & node)
+    {
+        auto * column_node = node->as<ColumnNode>();
+        if (!column_node)
+            return;
+
+        auto column_source = column_node->getColumnSourceOrNull();
+        if (!column_source)
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Invalid column {} in PREWHERE. In query {}",
+                column_node->formatASTForErrorMessage(),
+                query_node->formatASTForErrorMessage());
+
+        auto * table_column_source = column_source->as<TableNode>();
+        auto * table_function_column_source = column_source->as<TableFunctionNode>();
+
+        if (!table_column_source && !table_function_column_source)
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Invalid column {} in PREWHERE. Expected column source to be table or table function. Actual {}. In query {}",
+                column_node->formatASTForErrorMessage(),
+                column_source->formatASTForErrorMessage(),
+                query_node->formatASTForErrorMessage());
+
+        if (table_expression && table_expression.get() != column_source.get())
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Invalid column {} in PREWHERE. Expected columns from single table or table function {}. Actual {}. In query {}",
+                column_node->formatASTForErrorMessage(),
+                table_expression->formatASTForErrorMessage(),
+                column_source->formatASTForErrorMessage(),
+                query_node->formatASTForErrorMessage());
+
+        if (!table_expression)
+        {
+            const auto & storage = table_column_source ? table_column_source->getStorage() : table_function_column_source->getStorage();
+            if (!storage->supportsPrewhere())
+                throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                    "Storage {} (table {}) does not support PREWHERE",
+                    storage->getName(),
+                    storage->getStorageID().getNameForLogs());
+
+            table_expression = std::move(column_source);
+            table_supported_prewhere_columns = storage->supportedPrewhereColumns();
+        }
+
+        if (table_supported_prewhere_columns && !table_supported_prewhere_columns->contains(column_node->getColumnName()))
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Table expression {} does not support column {} in PREWHERE. In query {}",
+                table_expression->formatASTForErrorMessage(),
+                column_node->formatASTForErrorMessage(),
+                query_node->formatASTForErrorMessage());
+    }
+
+    static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
+    {
+        auto child_node_type = child_node->getNodeType();
+        return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
+    }
+
+private:
+    QueryTreeNodePtr query_node;
+    QueryTreeNodePtr table_expression;
+    std::optional<NameSet> table_supported_prewhere_columns;
+};
+
+void checkStorageSupportPrewhere(const QueryTreeNodePtr & table_expression)
+{
+    if (auto * table_node = table_expression->as<TableNode>())
+    {
+        auto storage = table_node->getStorage();
+        if (!storage->supportsPrewhere())
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Storage {} (table {}) does not support PREWHERE",
+                storage->getName(),
+                storage->getStorageID().getNameForLogs());
+    }
+    else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
+    {
+        auto storage = table_function_node->getStorage();
+        if (!storage->supportsPrewhere())
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Table function storage {} (table {}) does not support PREWHERE",
+                storage->getName(),
+                storage->getStorageID().getNameForLogs());
+    }
+    else
+    {
+        throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+            "Subquery {} does not support PREWHERE",
+            table_expression->formatASTForErrorMessage());
+    }
 }

-void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context)
+}
+
+void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr & planner_context)
 {
    auto & query_node_typed = query_node->as<QueryNode &>();
    auto table_expressions_nodes = extractTableExpressions(query_node_typed.getJoinTree());

    for (auto & table_expression_node : table_expressions_nodes)
    {
-        auto & table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression_node);
+        auto & table_expression_data = planner_context->getOrCreateTableExpressionData(table_expression_node);

        if (auto * table_node = table_expression_node->as<TableNode>())
        {
@ -108,8 +215,60 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext &
        }
    }

-    CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context);
-    collect_source_columns_visitor.visit(query_node);
+    CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
+    for (auto & node : query_node_typed.getChildren())
+    {
+        if (!node || node == query_node_typed.getPrewhere())
+            continue;
+
+        auto node_type = node->getNodeType();
+        if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
+            continue;
+
+        collect_source_columns_visitor.visit(node);
+    }
+
+    if (query_node_typed.hasPrewhere())
+    {
+        CollectPrewhereTableExpressionVisitor collect_prewhere_table_expression_visitor(query_node);
+        collect_prewhere_table_expression_visitor.visit(query_node_typed.getPrewhere());
+
+        auto prewhere_table_expression = collect_prewhere_table_expression_visitor.getPrewhereTableExpression();
+        if (!prewhere_table_expression)
+        {
+            prewhere_table_expression = table_expressions_nodes[0];
+            checkStorageSupportPrewhere(prewhere_table_expression);
+        }
+
+        auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression);
+        const auto & column_names = table_expression_data.getColumnNames();
+        NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end());
+
+        collect_source_columns_visitor.visit(query_node_typed.getPrewhere());
+
+        auto prewhere_actions_dag = std::make_shared<ActionsDAG>();
+
+        PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/);
+        auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere());
+        if (expression_nodes.size() != 1)
+            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
+                "Invalid PREWHERE. Expected single boolean expression. In query {}",
+                query_node->formatASTForErrorMessage());
+
+        prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]);
+
+        for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs())
+            if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name))
+                prewhere_actions_dag->getOutputs().push_back(prewhere_input_node);
+
+        table_expression_data.setPrewhereFilterActions(std::move(prewhere_actions_dag));
+    }
+}
+
+void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context)
+{
+    CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
+    collect_source_columns_visitor.visit(expression_node);
 }

 }
--- a/src/Planner/CollectTableExpressionData.h
+++ b/src/Planner/CollectTableExpressionData.h
@ -12,6 +12,13 @@ namespace DB
  *
  * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression.
  */
-void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context);
+void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr & planner_context);
+
+/** Collect source columns for expression node.
+  * Collected source columns are registered in planner context.
+  *
+  * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression.
+  */
+void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context);

 }
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@ -79,26 +79,14 @@ namespace ErrorCodes
    extern const int BAD_ARGUMENTS;
    extern const int TOO_DEEP_SUBQUERIES;
    extern const int NOT_IMPLEMENTED;
-    extern const int ILLEGAL_PREWHERE;
 }

 /** ClickHouse query planner.
  *
-  * TODO: Support JOIN with JOIN engine.
-  * TODO: Support VIEWs.
-  * TODO: JOIN drop unnecessary columns after ON, USING section
-  * TODO: Support RBAC. Support RBAC for ALIAS columns
-  * TODO: Support PREWHERE
-  * TODO: Support DISTINCT
-  * TODO: Support trivial count optimization
-  * TODO: Support projections
-  * TODO: Support read in order optimization
-  * TODO: UNION storage limits
-  * TODO: Support max streams
-  * TODO: Support ORDER BY read in order optimization
-  * TODO: Support GROUP BY read in order optimization
-  * TODO: Support Key Condition. Support indexes for IN function.
-  * TODO: Better support for quota and limits.
+  * TODO: Support projections.
+  * TODO: Support trivial count using partition predicates.
+  * TODO: Support trivial count for table functions.
+  * TODO: Support indexes for IN function.
  */

 namespace
@ -135,37 +123,6 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context)
    }
 }

-void checkStorageSupportPrewhere(const QueryTreeNodePtr & query_node)
-{
-    auto & query_node_typed = query_node->as<QueryNode &>();
-    auto table_expression = extractLeftTableExpression(query_node_typed.getJoinTree());
-
-    if (auto * table_node = table_expression->as<TableNode>())
-    {
-        auto storage = table_node->getStorage();
-        if (!storage->supportsPrewhere())
-            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
-                "Storage {} (table {}) does not support PREWHERE",
-                storage->getName(),
-                storage->getStorageID().getNameForLogs());
-    }
-    else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
-    {
-        auto storage = table_function_node->getStorage();
-        if (!storage->supportsPrewhere())
-            throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
-                "Table function storage {} (table {}) does not support PREWHERE",
-                storage->getName(),
-                storage->getStorageID().getNameForLogs());
-    }
-    else
-    {
-        throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
-            "Subquery {} does not support PREWHERE",
-            query_node->formatASTForErrorMessage());
-    }
-}
-
 /// Extend lifetime of query context, storages, and table locks
 void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context)
 {
@ -568,7 +525,8 @@ void addMergeSortingStep(QueryPlan & query_plan,
    auto merging_sorted = std::make_unique<SortingStep>(query_plan.getCurrentDataStream(),
        sort_description,
        max_block_size,
-        query_analysis_result.partial_sorting_limit);
+        query_analysis_result.partial_sorting_limit,
+        settings.exact_rows_before_limit);
    merging_sorted->setStepDescription("Merge sorted streams " + description);
    query_plan.addStep(std::move(merging_sorted));
 }
@ -1140,18 +1098,6 @@ void Planner::buildPlanForQueryNode()
    auto & query_node = query_tree->as<QueryNode &>();
    const auto & query_context = planner_context->getQueryContext();

-    if (query_node.hasPrewhere())
-    {
-        checkStorageSupportPrewhere(query_tree);
-
-        if (query_node.hasWhere())
-            query_node.getWhere() = mergeConditionNodes({query_node.getPrewhere(), query_node.getWhere()}, query_context);
-        else
-            query_node.getWhere() = query_node.getPrewhere();
-
-        query_node.getPrewhere() = {};
-    }
-
    if (query_node.hasWhere())
    {
        auto condition_constant = tryExtractConstantFromConditionNode(query_node.getWhere());
@ -1185,8 +1131,8 @@ void Planner::buildPlanForQueryNode()
    }

    checkStoragesSupportTransactions(planner_context);
-    collectTableExpressionData(query_tree, *planner_context);
    collectSets(query_tree, *planner_context);
+    collectTableExpressionData(query_tree, planner_context);

    auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
    auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree,
@ -1215,6 +1161,12 @@ void Planner::buildPlanForQueryNode()

    std::vector<ActionsDAGPtr> result_actions_to_execute;

+    for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData())
+    {
+        if (table_expression_data.getPrewhereFilterActions())
+            result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions());
+    }
+
    if (query_processing_info.isIntermediateStage())
    {
        addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan,
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@ -44,6 +44,264 @@ namespace ErrorCodes
 namespace
 {

+class ActionNodeNameHelper
+{
+public:
+    ActionNodeNameHelper(QueryTreeNodeToName & node_to_name_,
+        const PlannerContext & planner_context_,
+        bool use_column_identifier_as_action_node_name_)
+        : node_to_name(node_to_name_)
+        , planner_context(planner_context_)
+        , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
+    {
+    }
+
+    String calculateActionNodeName(const QueryTreeNodePtr & node)
+    {
+        auto it = node_to_name.find(node);
+        if (it != node_to_name.end())
+            return it->second;
+
+        String result;
+        auto node_type = node->getNodeType();
+
+        switch (node_type)
+        {
+            case QueryTreeNodeType::COLUMN:
+            {
+                const ColumnIdentifier * column_identifier = nullptr;
+                if (use_column_identifier_as_action_node_name)
+                    column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
+
+                if (column_identifier)
+                {
+                    result = *column_identifier;
+                }
+                else
+                {
+                    const auto & column_node = node->as<ColumnNode &>();
+                    result = column_node.getColumnName();
+                }
+
+                break;
+            }
+            case QueryTreeNodeType::CONSTANT:
+            {
+                const auto & constant_node = node->as<ConstantNode &>();
+                result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
+                break;
+            }
+            case QueryTreeNodeType::FUNCTION:
+            {
+                const auto & function_node = node->as<FunctionNode &>();
+                String in_function_second_argument_node_name;
+
+                if (isNameOfInFunction(function_node.getFunctionName()))
+                {
+                    const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
+                    in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node);
+                }
+
+                WriteBufferFromOwnString buffer;
+                buffer << function_node.getFunctionName();
+
+                const auto & function_parameters_nodes = function_node.getParameters().getNodes();
+
+                if (!function_parameters_nodes.empty())
+                {
+                    buffer << '(';
+
+                    size_t function_parameters_nodes_size = function_parameters_nodes.size();
+                    for (size_t i = 0; i < function_parameters_nodes_size; ++i)
+                    {
+                        const auto & function_parameter_node = function_parameters_nodes[i];
+                        buffer << calculateActionNodeName(function_parameter_node);
+
+                        if (i + 1 != function_parameters_nodes_size)
+                            buffer << ", ";
+                    }
+
+                    buffer << ')';
+                }
+
+                const auto & function_arguments_nodes = function_node.getArguments().getNodes();
+                String function_argument_name;
+
+                buffer << '(';
+
+                size_t function_arguments_nodes_size = function_arguments_nodes.size();
+                for (size_t i = 0; i < function_arguments_nodes_size; ++i)
+                {
+                    if (i == 1 && !in_function_second_argument_node_name.empty())
+                    {
+                        function_argument_name = in_function_second_argument_node_name;
+                    }
+                    else
+                    {
+                        const auto & function_argument_node = function_arguments_nodes[i];
+                        function_argument_name = calculateActionNodeName(function_argument_node);
+                    }
+
+                    buffer << function_argument_name;
+
+                    if (i + 1 != function_arguments_nodes_size)
+                        buffer << ", ";
+                }
+
+                buffer << ')';
+
+                if (function_node.isWindowFunction())
+                {
+                    buffer << " OVER (";
+                    buffer << calculateWindowNodeActionName(function_node.getWindowNode());
+                    buffer << ')';
+                }
+
+                result = buffer.str();
+                break;
+            }
+            case QueryTreeNodeType::LAMBDA:
+            {
+                auto lambda_hash = node->getTreeHash();
+                result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second);
+                break;
+            }
+            default:
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
+            }
+        }
+
+        node_to_name.emplace(node, result);
+
+        return result;
+    }
+
+    static String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
+    {
+        auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
+        return constant_name + "_" + constant_type->getName();
+    }
+
+    static String calculateConstantActionNodeName(const Field & constant_literal)
+    {
+        return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
+    }
+
+    String calculateWindowNodeActionName(const QueryTreeNodePtr & node)
+    {
+        auto & window_node = node->as<WindowNode &>();
+        WriteBufferFromOwnString buffer;
+
+        if (window_node.hasPartitionBy())
+        {
+            buffer << "PARTITION BY ";
+
+            auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
+            size_t partition_by_nodes_size = partition_by_nodes.size();
+
+            for (size_t i = 0; i < partition_by_nodes_size; ++i)
+            {
+                auto & partition_by_node = partition_by_nodes[i];
+                buffer << calculateActionNodeName(partition_by_node);
+                if (i + 1 != partition_by_nodes_size)
+                    buffer << ", ";
+            }
+        }
+
+        if (window_node.hasOrderBy())
+        {
+            if (window_node.hasPartitionBy())
+                buffer << ' ';
+
+            buffer << "ORDER BY ";
+
+            auto & order_by_nodes = window_node.getOrderBy().getNodes();
+            size_t order_by_nodes_size = order_by_nodes.size();
+
+            for (size_t i = 0; i < order_by_nodes_size; ++i)
+            {
+                auto & sort_node = order_by_nodes[i]->as<SortNode &>();
+                buffer << calculateActionNodeName(sort_node.getExpression());
+
+                auto sort_direction = sort_node.getSortDirection();
+                buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
+
+                auto nulls_sort_direction = sort_node.getNullsSortDirection();
+
+                if (nulls_sort_direction)
+                    buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
+
+                if (auto collator = sort_node.getCollator())
+                    buffer << " COLLATE " << collator->getLocale();
+
+                if (sort_node.withFill())
+                {
+                    buffer << " WITH FILL";
+
+                    if (sort_node.hasFillFrom())
+                        buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom());
+
+                    if (sort_node.hasFillTo())
+                        buffer << " TO " << calculateActionNodeName(sort_node.getFillTo());
+
+                    if (sort_node.hasFillStep())
+                        buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep());
+                }
+
+                if (i + 1 != order_by_nodes_size)
+                    buffer << ", ";
+            }
+        }
+
+        auto & window_frame = window_node.getWindowFrame();
+        if (!window_frame.is_default)
+        {
+            if (window_node.hasPartitionBy() || window_node.hasOrderBy())
+                buffer << ' ';
+
+            buffer << window_frame.type << " BETWEEN ";
+            if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
+            {
+                buffer << "CURRENT ROW";
+            }
+            else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
+            {
+                buffer << "UNBOUNDED";
+                buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
+            }
+            else
+            {
+                buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode());
+                buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
+            }
+
+            buffer << " AND ";
+
+            if (window_frame.end_type == WindowFrame::BoundaryType::Current)
+            {
+                buffer << "CURRENT ROW";
+            }
+            else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
+            {
+                buffer << "UNBOUNDED";
+                buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
+            }
+            else
+            {
+                buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode());
+                buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
+            }
+        }
+
+        return buffer.str();
+    }
+private:
+    std::unordered_map<QueryTreeNodePtr, std::string> & node_to_name;
+    const PlannerContext & planner_context;
+    bool use_column_identifier_as_action_node_name = true;
+};
+
 class ActionsScopeNode
 {
 public:
@ -165,7 +423,9 @@ private:
 class PlannerActionsVisitorImpl
 {
 public:
-    PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_);
+    PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
+        const PlannerContextPtr & planner_context_,
+        bool use_column_identifier_as_action_node_name_);

    ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);

@ -189,10 +449,14 @@ private:
    std::vector<ActionsScopeNode> actions_stack;
    std::unordered_map<QueryTreeNodePtr, std::string> node_to_node_name;
    const PlannerContextPtr planner_context;
+    ActionNodeNameHelper action_node_name_helper;
 };

-PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_)
+PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
+    const PlannerContextPtr & planner_context_,
+    bool use_column_identifier_as_action_node_name_)
    : planner_context(planner_context_)
+    , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
 {
    actions_stack.emplace_back(std::move(actions_dag), nullptr);
 }
@ -236,7 +500,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi

 PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
 {
-    auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
+    auto column_node_name = action_node_name_helper.calculateActionNodeName(node);
    const auto & column_node = node->as<ColumnNode &>();

    Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
@ -386,7 +650,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
 PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
 {
    const auto & function_node = node->as<FunctionNode &>();
-    auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
+    auto function_node_name = action_node_name_helper.calculateActionNodeName(node);

    auto index_hint_actions_dag = std::make_shared<ActionsDAG>();
    auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs();
@ -428,7 +692,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
    if (isNameOfInFunction(function_node.getFunctionName()))
        in_function_second_argument_node_name_with_level = makeSetForInFunction(node);

-    auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
+    auto function_node_name = action_node_name_helper.calculateActionNodeName(node);

    /* Aggregate functions, window functions, and GROUP BY expressions were already analyzed in the previous steps.
     * If we have already visited some expression, we don't need to revisit it or its arguments again.
@ -516,266 +780,57 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi

 }

-PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_)
+PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
    : planner_context(planner_context_)
+    , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
 {}

 ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node)
 {
-    PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context);
+    PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
    return actions_visitor_impl.visit(expression_node);
 }

-String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name)
+String calculateActionNodeName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    QueryTreeNodeToName & node_to_name,
+    bool use_column_identifier_as_action_node_name)
 {
-    auto it = node_to_name.find(node);
-    if (it != node_to_name.end())
-        return it->second;
-
-    String result;
-    auto node_type = node->getNodeType();
-
-    switch (node_type)
-    {
-        case QueryTreeNodeType::COLUMN:
-        {
-            const auto * column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
-
-            if (column_identifier)
-            {
-                result = *column_identifier;
-            }
-            else
-            {
-                const auto & column_node = node->as<ColumnNode &>();
-                result = column_node.getColumnName();
-            }
-
-            break;
-        }
-        case QueryTreeNodeType::CONSTANT:
-        {
-            const auto & constant_node = node->as<ConstantNode &>();
-            result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
-            break;
-        }
-        case QueryTreeNodeType::FUNCTION:
-        {
-            const auto & function_node = node->as<FunctionNode &>();
-            String in_function_second_argument_node_name;
-
-            if (isNameOfInFunction(function_node.getFunctionName()))
-            {
-                const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
-                in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node);
-            }
-
-            WriteBufferFromOwnString buffer;
-            buffer << function_node.getFunctionName();
-
-            const auto & function_parameters_nodes = function_node.getParameters().getNodes();
-
-            if (!function_parameters_nodes.empty())
-            {
-                buffer << '(';
-
-                size_t function_parameters_nodes_size = function_parameters_nodes.size();
-                for (size_t i = 0; i < function_parameters_nodes_size; ++i)
-                {
-                    const auto & function_parameter_node = function_parameters_nodes[i];
-                    buffer << calculateActionNodeName(function_parameter_node, planner_context, node_to_name);
-
-                    if (i + 1 != function_parameters_nodes_size)
-                        buffer << ", ";
-                }
-
-                buffer << ')';
-            }
-
-            const auto & function_arguments_nodes = function_node.getArguments().getNodes();
-            String function_argument_name;
-
-            buffer << '(';
-
-            size_t function_arguments_nodes_size = function_arguments_nodes.size();
-            for (size_t i = 0; i < function_arguments_nodes_size; ++i)
-            {
-                if (i == 1 && !in_function_second_argument_node_name.empty())
-                {
-                    function_argument_name = in_function_second_argument_node_name;
-                }
-                else
-                {
-                    const auto & function_argument_node = function_arguments_nodes[i];
-                    function_argument_name = calculateActionNodeName(function_argument_node, planner_context, node_to_name);
-                }
-
-                buffer << function_argument_name;
-
-                if (i + 1 != function_arguments_nodes_size)
-                    buffer << ", ";
-            }
-
-            buffer << ')';
-
-            if (function_node.isWindowFunction())
-            {
-                buffer << " OVER (";
-                buffer << calculateWindowNodeActionName(function_node.getWindowNode(), planner_context, node_to_name);
-                buffer << ')';
-            }
-
-            result = buffer.str();
-            break;
-        }
-        case QueryTreeNodeType::LAMBDA:
-        {
-            auto lambda_hash = node->getTreeHash();
-
-            result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second);
-            break;
-        }
-        default:
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
-        }
-    }
-
-    node_to_name.emplace(node, result);
-
-    return result;
+    ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
+    return helper.calculateActionNodeName(node);
 }

-String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context)
+String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
 {
    QueryTreeNodeToName empty_map;
-    return calculateActionNodeName(node, planner_context, empty_map);
+    ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
+    return helper.calculateActionNodeName(node);
 }

 String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
 {
-    auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
-    return constant_name + "_" + constant_type->getName();
+    return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal, constant_type);
 }

 String calculateConstantActionNodeName(const Field & constant_literal)
 {
-    return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
+    return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal);
 }

-String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name)
+String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    QueryTreeNodeToName & node_to_name,
+    bool use_column_identifier_as_action_node_name)
 {
-    auto & window_node = node->as<WindowNode &>();
-    WriteBufferFromOwnString buffer;
-
-    if (window_node.hasPartitionBy())
-    {
-        buffer << "PARTITION BY ";
-
-        auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
-        size_t partition_by_nodes_size = partition_by_nodes.size();
-
-        for (size_t i = 0; i < partition_by_nodes_size; ++i)
-        {
-            auto & partition_by_node = partition_by_nodes[i];
-            buffer << calculateActionNodeName(partition_by_node, planner_context, node_to_name);
-            if (i + 1 != partition_by_nodes_size)
-                buffer << ", ";
-        }
-    }
-
-    if (window_node.hasOrderBy())
-    {
-        if (window_node.hasPartitionBy())
-            buffer << ' ';
-
-        buffer << "ORDER BY ";
-
-        auto & order_by_nodes = window_node.getOrderBy().getNodes();
-        size_t order_by_nodes_size = order_by_nodes.size();
-
-        for (size_t i = 0; i < order_by_nodes_size; ++i)
-        {
-            auto & sort_node = order_by_nodes[i]->as<SortNode &>();
-            buffer << calculateActionNodeName(sort_node.getExpression(), planner_context, node_to_name);
-
-            auto sort_direction = sort_node.getSortDirection();
-            buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
-
-            auto nulls_sort_direction = sort_node.getNullsSortDirection();
-
-            if (nulls_sort_direction)
-                buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
-
-            if (auto collator = sort_node.getCollator())
-                buffer << " COLLATE " << collator->getLocale();
-
-            if (sort_node.withFill())
-            {
-                buffer << " WITH FILL";
-
-                if (sort_node.hasFillFrom())
-                    buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom(), planner_context, node_to_name);
-
-                if (sort_node.hasFillTo())
-                    buffer << " TO " << calculateActionNodeName(sort_node.getFillTo(), planner_context, node_to_name);
-
-                if (sort_node.hasFillStep())
-                    buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep(), planner_context, node_to_name);
-            }
-
-            if (i + 1 != order_by_nodes_size)
-                buffer << ", ";
-        }
-    }
-
-    auto & window_frame = window_node.getWindowFrame();
-    if (!window_frame.is_default)
-    {
-        if (window_node.hasPartitionBy() || window_node.hasOrderBy())
-            buffer << ' ';
-
-        buffer << window_frame.type << " BETWEEN ";
-        if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
-        {
-            buffer << "CURRENT ROW";
-        }
-        else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
-        {
-            buffer << "UNBOUNDED";
-            buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
-        }
-        else
-        {
-            buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode(), planner_context, node_to_name);
-            buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
-        }
-
-        buffer << " AND ";
-
-        if (window_frame.end_type == WindowFrame::BoundaryType::Current)
-        {
-            buffer << "CURRENT ROW";
-        }
-        else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
-        {
-            buffer << "UNBOUNDED";
-            buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
-        }
-        else
-        {
-            buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode(), planner_context, node_to_name);
-            buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
-        }
-    }
-
-    return buffer.str();
+    ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
+    return helper.calculateWindowNodeActionName(node);
 }

-String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context)
+String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
 {
    QueryTreeNodeToName empty_map;
-    return calculateWindowNodeActionName(node, planner_context, empty_map);
+    ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
+    return helper.calculateWindowNodeActionName(node);
 }

 }
--- a/src/Planner/PlannerActionsVisitor.h
+++ b/src/Planner/PlannerActionsVisitor.h
@ -23,7 +23,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
  * Preconditions:
  * 1. Table expression data for table expression nodes is collected in planner context.
  * For column node, that has column table expression source, identifier for column name in table expression data
-  * is used as action dag node name.
+  * is used as action dag node name, if use_column_identifier_as_action_node_name = true.
  * 2. Sets for IN functions are already collected in planner context.
  *
  * During actions build, there is special handling for following functions:
@ -33,7 +33,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
 class PlannerActionsVisitor
 {
 public:
-    explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_);
+    explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true);

    /** Add actions necessary to calculate expression node into expression dag.
      * Necessary actions are not added in actions dag output.
@ -43,21 +43,27 @@ public:

 private:
    const PlannerContextPtr planner_context;
+    bool use_column_identifier_as_action_node_name = true;
 };

 /** Calculate query tree expression node action dag name and add them into node to name map.
  * If node exists in map, name from map is used.
  *
-  * For column node column node identifier from planner context is used.
+  * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
  */
 using QueryTreeNodeToName = std::unordered_map<QueryTreeNodePtr, String>;
-String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name);
+String calculateActionNodeName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    QueryTreeNodeToName & node_to_name,
+    bool use_column_identifier_as_action_node_name = true);

 /** Calculate query tree expression node action dag name.
  *
-  * For column node column node identifier from planner context is used.
+  * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
  */
-String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context);
+String calculateActionNodeName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    bool use_column_identifier_as_action_node_name = true);

 /// Calculate action node name for constant
 String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type);
@ -67,12 +73,19 @@ String calculateConstantActionNodeName(const Field & constant_literal);

 /** Calculate action node name for window node.
  * Window node action name can only be part of window function action name.
+  * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
  */
-String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name);
+String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    QueryTreeNodeToName & node_to_name,
+    bool use_column_identifier_as_action_node_name = true);

 /** Calculate action node name for window node.
  * Window node action name can only be part of window function action name.
+  * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
  */
-String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context);
+String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
+    const PlannerContext & planner_context,
+    bool use_column_identifier_as_action_node_name = true);

 }
--- a/Show More
+++ b/Show More