From 0818092ae8d49f2e7f87fed6c8703374384719fc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 7 May 2023 19:45:57 +0200
Subject: [PATCH 001/242] Enable Sparse columns by default

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 5416b77a97e..27f482d79ba 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -37,7 +37,7 @@ struct Settings;
     M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \
     M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \
     M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \
-    M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
+    M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
     \
     /** Merge settings. */ \
     M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \

From 7ec98205b58ab36eb28b2f46348dfcfe22215a3c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 7 May 2023 22:54:14 +0300
Subject: [PATCH 002/242] Update MergeTreeSettings.h

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 27f482d79ba..caac86c6706 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -37,7 +37,7 @@ struct Settings;
     M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \
     M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \
     M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \
-    M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
+    M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
     \
     /** Merge settings. */ \
     M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \

From f3f6ccd7733aa4946c339b4973210f85243e44d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 8 May 2023 00:28:54 +0200
Subject: [PATCH 003/242] Update tests

---
 .../0_stateless/00443_preferred_block_size_bytes.sh  |  6 +++---
 ...0484_preferred_max_column_in_block_size_bytes.sql |  8 ++++----
 .../00804_test_delta_codec_compression.sql           | 12 ++++++------
 .../0_stateless/00950_test_double_delta_codec.sql    |  2 +-
 ...00961_checksums_in_system_parts_columns_table.sql |  2 +-
 .../0_stateless/01055_compact_parts_granularity.sh   |  2 +-
 .../queries/0_stateless/01786_explain_merge_tree.sh  |  4 ++--
 tests/queries/0_stateless/02263_lazy_mark_load.sh    |  2 +-
 .../0_stateless/02293_selected_rows_and_merges.sh    |  8 +++-----
 .../0_stateless/02361_fsync_profile_events.sh        |  7 ++++---
 .../02381_compress_marks_and_primary_key.sql         |  4 ++--
 11 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh
index c184b58bf53..27b9f5c00c7 100755
--- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh
+++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
 $CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90"
 $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes"
 $CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes"
@@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
 # PREWHERE using empty column
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
 $CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000"
 $CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0"
 $CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0"
@@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs"
 # Nullable PREWHERE
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
 $CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
 $CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)"
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
diff --git a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
index 470bca70e06..be4af2221a5 100644
--- a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
+++ b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
@@ -1,7 +1,7 @@
 -- Tags: no-random-settings
 
 drop table if exists tab_00484;
-create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
+create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192;
 
 set preferred_block_size_bytes = 2000000;
@@ -17,19 +17,19 @@ set preferred_max_column_in_block_size_bytes = 4194304;
 select max(blockSize()), min(blockSize()), any(ignore(*)) from tab_00484;
 
 drop table if exists tab_00484;
-create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
+create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 47;
 set preferred_max_column_in_block_size_bytes = 1152;
 select blockSize(), * from tab_00484 where x = 1 or x > 36 format Null;
 
 drop table if exists tab_00484;
-create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
+create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 10;
 set preferred_max_column_in_block_size_bytes = 128;
 select s from tab_00484 where s == '' format Null;
 
 drop table if exists tab_00484;
-create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
+create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 insert into tab_00484 select today(), number, 'abc' from system.numbers limit 81920;
 set preferred_block_size_bytes = 0;
 select count(*) from tab_00484 prewhere s != 'abc' format Null;
diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
index 25988f6474b..01a2f53bf93 100644
--- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
+++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
@@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic;
 CREATE TABLE delta_codec_synthetic
 (
     id UInt64 Codec(Delta, ZSTD(3))
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 CREATE TABLE default_codec_synthetic
 (
     id UInt64 Codec(ZSTD(3))
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
 INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
@@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float;
 CREATE TABLE delta_codec_float
 (
     id Float64 Codec(Delta, LZ4HC)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 CREATE TABLE default_codec_float
 (
     id Float64 Codec(LZ4HC)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0;
 INSERT INTO default_codec_float SELECT * from delta_codec_float;
@@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string;
 CREATE TABLE delta_codec_string
 (
     id Float64 Codec(Delta, LZ4)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 CREATE TABLE default_codec_string
 (
     id Float64 Codec(LZ4)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000);
 INSERT INTO default_codec_string SELECT * from delta_codec_string;
diff --git a/tests/queries/0_stateless/00950_test_double_delta_codec.sql b/tests/queries/0_stateless/00950_test_double_delta_codec.sql
index f6199a6e4ec..58cf35b5248 100644
--- a/tests/queries/0_stateless/00950_test_double_delta_codec.sql
+++ b/tests/queries/0_stateless/00950_test_double_delta_codec.sql
@@ -24,7 +24,7 @@ CREATE TABLE codecTest (
     valueI8  Int8     CODEC(DoubleDelta),
     valueDT  DateTime CODEC(DoubleDelta),
     valueD   Date     CODEC(DoubleDelta)
-) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0;
+) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 
 
 -- checking for overflow
diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
index 43b7775e816..8df7d728560 100644
--- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
+++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
@@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_00961;
 
 CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32)
     ENGINE = MergeTree PARTITION BY d ORDER BY (a, b)
-    SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi';
+    SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi', ratio_of_defaults_for_sparse_serialization = 1;
 
 INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
 
diff --git a/tests/queries/0_stateless/01055_compact_parts_granularity.sh b/tests/queries/0_stateless/01055_compact_parts_granularity.sh
index f3da33f6ccf..3e5da1e6f90 100755
--- a/tests/queries/0_stateless/01055_compact_parts_granularity.sh
+++ b/tests/queries/0_stateless/01055_compact_parts_granularity.sh
@@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mt_compact"
 
 $CLICKHOUSE_CLIENT -q "CREATE TABLE mt_compact(a Int, s String) ENGINE = MergeTree ORDER BY a
                         SETTINGS min_rows_for_wide_part = 1000,
-                        index_granularity = 14;"
+                        index_granularity = 14, ratio_of_defaults_for_sparse_serialization = 1;"
 
 $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES mt_compact"
 
diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh
index 15f8821d80d..0d4acba338a 100755
--- a/tests/queries/0_stateless/01786_explain_merge_tree.sh
+++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh
@@ -10,7 +10,7 @@ CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_qu
 $CLICKHOUSE_CLIENT -q "drop table if exists test_index"
 $CLICKHOUSE_CLIENT -q "drop table if exists idx"
 
-$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
 $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)"
 
 $CLICKHOUSE_CLIENT -q "
@@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -q "
     explain actions = 1 select x from test_index where x > 15 order by x desc;
     " | grep -A 100 "ReadFromMergeTree"
 
-$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1"
 $CLICKHOUSE_CLIENT -q "insert into idx select number, number, number from numbers(10)"
 
 $CLICKHOUSE_CLIENT -q "
diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh
index bf37556bfa6..35a1b4a44dd 100755
--- a/tests/queries/0_stateless/02263_lazy_mark_load.sh
+++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh
@@ -24,7 +24,7 @@ CREATE TABLE lazy_mark_test
   n9 UInt64
 )
 ENGINE = MergeTree
-ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0;
+ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1;
 EOF
 
 ${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES lazy_mark_test"
diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
index 9d1483f5bf7..76c562c9744 100755
--- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
+++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))")
 
-${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x"
+${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x SETTINGS ratio_of_defaults_for_sparse_serialization = 1"
 ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 0 from numbers(1e6)"
 ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 1 from numbers(1e6)"
 
@@ -17,13 +17,11 @@ ${CLICKHOUSE_CLIENT} --optimize_throw_if_noop 1 -q "optimize table tt final" "--
 
 # Here SelectRows and SelectBytes should be zero, MergedRows is 2m and MergedUncompressedBytes is 16m
 ${CLICKHOUSE_CLIENT} -q "system flush logs"
-${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()"
+${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()"
 
 ${CLICKHOUSE_CLIENT} --mutations_sync 1 -q "alter table tt update y = y + 1 where 1" "--query_id=$query_id"
 ${CLICKHOUSE_CLIENT} -q "system flush logs"
 
 # Here for mutation all values are 0, cause mutation is executed async.
 # It's pretty hard to write a test with total counter.
-${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()"
-
-
+${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()"
diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh
index 5b603133f6c..e150d70b896 100755
--- a/tests/queries/0_stateless/02361_fsync_profile_events.sh
+++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh
@@ -12,9 +12,10 @@ $CLICKHOUSE_CLIENT -nm -q "
     create table data_fsync_pe (key Int) engine=MergeTree()
     order by key
     settings
-        min_rows_for_wide_part=2,
-        fsync_after_insert=1,
-        fsync_part_directory=1;
+        min_rows_for_wide_part = 2,
+        fsync_after_insert = 1,
+        fsync_part_directory = 1,
+        ratio_of_defaults_for_sparse_serialization = 1;
 "
 
 ret=1
diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
index 842e22ba87d..2fe0943745d 100644
--- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
+++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
@@ -1,12 +1,12 @@
 -- Tags: no-upgrade-check, no-random-merge-tree-settings
 
 drop table if exists test_02381;
-create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false;
+create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 insert into test_02381 select number, number * 10 from system.numbers limit 1000000;
 
 drop table if exists test_02381_compress;
 create table test_02381_compress(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b)
-    SETTINGS compress_marks=true, compress_primary_key=true, marks_compression_codec='ZSTD(3)', primary_key_compression_codec='ZSTD(3)', marks_compress_block_size=65536, primary_key_compress_block_size=65536;
+    SETTINGS compress_marks = true, compress_primary_key = true, marks_compression_codec = 'ZSTD(3)', primary_key_compression_codec = 'ZSTD(3)', marks_compress_block_size = 65536, primary_key_compress_block_size = 65536, ratio_of_defaults_for_sparse_serialization = 1;
 insert into test_02381_compress select number, number * 10 from system.numbers limit 1000000;
 
 select * from test_02381_compress where a = 1000 limit 1;

From e8f7a84ca6c4e00f6f9ddbf282b109f491244c4c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 8 May 2023 00:37:10 +0200
Subject: [PATCH 004/242] Update a few tests

---
 tests/queries/0_stateless/01375_compact_parts_codecs.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
index 1dd39e67876..1c89eb09d0b 100644
--- a/tests/queries/0_stateless/01375_compact_parts_codecs.sql
+++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql
@@ -4,7 +4,7 @@ DROP TABLE IF EXISTS codecs;
 
 CREATE TABLE codecs (id UInt32, val UInt32, s String)
     ENGINE = MergeTree ORDER BY id
-    SETTINGS min_rows_for_wide_part = 10000;
+    SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
 INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
 SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts
@@ -21,7 +21,7 @@ DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE))
     ENGINE = MergeTree ORDER BY id
-    SETTINGS min_rows_for_wide_part = 10000;
+    SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
 INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
 SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts
@@ -38,7 +38,7 @@ DROP TABLE codecs;
 
 CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD))
     ENGINE = MergeTree ORDER BY id
-    SETTINGS min_rows_for_wide_part = 10000;
+    SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1;
 INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000);
 SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes)
     FROM system.parts

From a25de5fb4186fbe103f916b07aa8bd89975048b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 8 May 2023 00:55:44 +0200
Subject: [PATCH 005/242] Update a test

---
 .../02530_dictionaries_update_field.reference | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference
index 40f2c0ee400..88c910e0313 100644
--- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference
+++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference
@@ -4,13 +4,13 @@ flat
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -21,13 +21,13 @@ flat/custom
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -38,13 +38,13 @@ hashed
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -55,13 +55,13 @@ hashed/custom
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -72,13 +72,13 @@ complex_key_hashed
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -89,13 +89,13 @@ complex_key_hashed/custom
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated

From 6b0bd698d36014a5eac052857bac2185a1f45f41 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 11 May 2023 04:17:53 +0200
Subject: [PATCH 006/242] Fix mistake

---
 .../02530_dictionaries_update_field.reference | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference
index 88c910e0313..40f2c0ee400 100644
--- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference
+++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference
@@ -4,13 +4,13 @@ flat
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_flat ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -21,13 +21,13 @@ flat/custom
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_flat_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -38,13 +38,13 @@ hashed
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_hashed ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -55,13 +55,13 @@ hashed/custom
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_hashed_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -72,13 +72,13 @@ complex_key_hashed
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC;
 1	First
 2	SecondUpdated
@@ -89,13 +89,13 @@ complex_key_hashed/custom
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 2	Second
 INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now());
 INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now());
-SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null;
+SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null;
 SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC;
 1	First
 2	SecondUpdated

From 65d28a959ff5b21199c2b20d8dcb7c7b399f314d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 11 May 2023 04:26:29 +0200
Subject: [PATCH 007/242] Update integration tests (1/2)

---
 .../configs/config.d/storage_conf.xml                         | 1 +
 .../test_merge_tree_hdfs/configs/config.d/storage_conf.xml    | 1 +
 .../test_merge_tree_s3_failover/configs/config.xml            | 4 ++++
 .../test_s3_zero_copy_replication/configs/config.d/s3.xml     | 1 +
 4 files changed, 7 insertions(+)

diff --git a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml
index cb87abcc693..d69fe96a3e2 100644
--- a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml
@@ -45,5 +45,6 @@
 
     <merge_tree>
         <allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
index 890c396ed95..7d59081486b 100644
--- a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
@@ -28,5 +28,6 @@
 
     <merge_tree>
         <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml
index feb537ebbce..743d75d9a21 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.xml
@@ -15,4 +15,8 @@
     <max_concurrent_queries>500</max_concurrent_queries>
     <path>./clickhouse/</path>
     <users_config>users.xml</users_config>
+
+    <merge_tree>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
+    </merge_tree>
 </clickhouse>
diff --git a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml
index f7d9efc2cae..55c35999703 100644
--- a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml
+++ b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml
@@ -70,6 +70,7 @@
         <min_bytes_for_wide_part>1024</min_bytes_for_wide_part>
         <old_parts_lifetime>1</old_parts_lifetime>
         <allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 
     <remote_servers>

From 4a7761c16210c7e2eccc0b26e172ec8dc7e6c183 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 10 Jun 2023 08:26:32 +0000
Subject: [PATCH 008/242] Add column is_obsolete for system.settings table and
 related system tables

---
 src/Interpreters/Context.cpp                              | 6 ++++--
 src/Storages/System/StorageSystemMergeTreeSettings.cpp    | 2 ++
 src/Storages/System/StorageSystemServerSettings.cpp       | 2 ++
 src/Storages/System/StorageSystemSettings.cpp             | 2 ++
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 tests/queries/0_stateless/01945_system_warnings.reference | 2 +-
 tests/queries/0_stateless/01945_system_warnings.sh        | 4 ++--
 7 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 995e78d8f0b..a12117b7677 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -781,8 +781,10 @@ Strings Context::getWarnings() const
     {
         if (setting.isValueChanged() && setting.isObsolete())
         {
-            common_warnings.emplace_back("Some obsolete setting is changed. "
-                                         "Check 'select * from system.settings where changed' and read the changelog.");
+            common_warnings.emplace_back(
+                "Obsolete setting `" + setting.getName()
+                + "` is changed. "
+                  "Check 'select * from system.settings where changed' and read the changelog.");
             break;
         }
     }
diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp
index 6de3fb800f4..0ddd4546208 100644
--- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp
+++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp
@@ -21,6 +21,7 @@ NamesAndTypesList SystemMergeTreeSettings<replicated>::getNamesAndTypes()
         {"max",         std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
         {"readonly",    std::make_shared<DataTypeUInt8>()},
         {"type",        std::make_shared<DataTypeString>()},
+        {"is_obsolete", std::make_shared<DataTypeUInt8>()},
     };
 }
 
@@ -52,6 +53,7 @@ void SystemMergeTreeSettings<replicated>::fillData(MutableColumns & res_columns,
         res_columns[5]->insert(max);
         res_columns[6]->insert(writability == SettingConstraintWritability::CONST);
         res_columns[7]->insert(setting.getTypeName());
+        res_columns[8]->insert(setting.isObsolete());
     }
 }
 
diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp
index ad52c6896ac..290b575465c 100644
--- a/src/Storages/System/StorageSystemServerSettings.cpp
+++ b/src/Storages/System/StorageSystemServerSettings.cpp
@@ -15,6 +15,7 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
         {"changed", std::make_shared<DataTypeUInt8>()},
         {"description", std::make_shared<DataTypeString>()},
         {"type", std::make_shared<DataTypeString>()},
+        {"is_obsolete", std::make_shared<DataTypeUInt8>()},
     };
 }
 
@@ -33,6 +34,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
         res_columns[3]->insert(setting.isValueChanged());
         res_columns[4]->insert(setting.getDescription());
         res_columns[5]->insert(setting.getTypeName());
+        res_columns[6]->insert(setting.isObsolete());
     }
 }
 
diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp
index c54f7eef25f..dcb54eac0a0 100644
--- a/src/Storages/System/StorageSystemSettings.cpp
+++ b/src/Storages/System/StorageSystemSettings.cpp
@@ -21,6 +21,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
         {"type", std::make_shared<DataTypeString>()},
         {"default", std::make_shared<DataTypeString>()},
         {"alias_for", std::make_shared<DataTypeString>()},
+        {"is_obsolete", std::make_shared<DataTypeUInt8>()},
     };
 }
 
@@ -51,6 +52,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co
         res_columns[6]->insert(writability == SettingConstraintWritability::CONST);
         res_columns[7]->insert(setting.getTypeName());
         res_columns[8]->insert(setting.getDefaultValueString());
+        res_columns[10]->insert(setting.isObsolete());
     };
 
     const auto & settings_to_aliases = Settings::Traits::settingsToAliases();
diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 4e6dd3e1b0f..3d5b1ca99a5 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Some obsolete setting is changed."
+expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed."
 expect ":) "
 send -- "q\r"
 expect eof
diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index 296a03447db..d6ae567289c 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Some obsolete setting is changed. Check \'select * from system.settings where changed\' and read the changelog.
+Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog.
 1
 1
diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh
index bf11cee2911..112baab614e 100755
--- a/tests/queries/0_stateless/01945_system_warnings.sh
+++ b/tests/queries/0_stateless/01945_system_warnings.sh
@@ -14,8 +14,8 @@ else
     echo "Server was built in debug mode. It will work slowly."
 fi
 
-${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%obsolete setting%'"
-${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%obsolete setting%'"
+${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%Obsolete setting%'"
+${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%Obsolete setting%'"
 
 # Avoid duplicated warnings
 ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.warnings"

From 820673a5cf3f3f1c17b781496b3ab56f72f72c08 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 10 Jun 2023 10:16:53 +0000
Subject: [PATCH 009/242] update test

---
 .../queries/0_stateless/01221_system_settings.reference  | 4 ++--
 .../0_stateless/02117_show_create_table_system.reference | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01221_system_settings.reference b/tests/queries/0_stateless/01221_system_settings.reference
index 399b3778b66..e9c2f3fec32 100644
--- a/tests/queries/0_stateless/01221_system_settings.reference
+++ b/tests/queries/0_stateless/01221_system_settings.reference
@@ -1,4 +1,4 @@
-send_timeout	300	0	Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server.	\N	\N	0	Seconds	300	
-storage_policy	default	0	Name of storage disk policy	\N	\N	0	String
+send_timeout	300	0	Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server.	\N	\N	0	Seconds	300		0
+storage_policy	default	0	Name of storage disk policy	\N	\N	0	String	0
 1
 1
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index e864ba85018..38d00c15725 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -346,7 +346,8 @@ CREATE TABLE system.merge_tree_settings
     `min` Nullable(String),
     `max` Nullable(String),
     `readonly` UInt8,
-    `type` String
+    `type` String,
+    `is_obsolete` UInt8
 )
 ENGINE = SystemMergeTreeSettings
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -918,7 +919,8 @@ CREATE TABLE system.replicated_merge_tree_settings
     `min` Nullable(String),
     `max` Nullable(String),
     `readonly` UInt8,
-    `type` String
+    `type` String,
+    `is_obsolete` UInt8
 )
 ENGINE = SystemReplicatedMergeTreeSettings
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -993,7 +995,8 @@ CREATE TABLE system.settings
     `readonly` UInt8,
     `type` String,
     `default` String,
-    `alias_for` String
+    `alias_for` String,
+    `is_obsolete` UInt8
 )
 ENGINE = SystemSettings
 COMMENT 'SYSTEM TABLE is built on the fly.'

From e9763caa0eb7078cd28e3765d0da1e0a9b4a204b Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Mon, 12 Jun 2023 14:21:58 +0000
Subject: [PATCH 010/242] fix

---
 src/Interpreters/Context.cpp                   | 18 +++++++++++++-----
 .../01945_system_warnings.reference            |  2 +-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a12117b7677..823c3d678df 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -777,17 +777,25 @@ Strings Context::getWarnings() const
         auto lock = getLock();
         common_warnings = shared->warnings;
     }
+    String res = "Obsolete settings [";
+    size_t obsolete_settings_count = 0;
     for (const auto & setting : settings)
     {
         if (setting.isValueChanged() && setting.isObsolete())
         {
-            common_warnings.emplace_back(
-                "Obsolete setting `" + setting.getName()
-                + "` is changed. "
-                  "Check 'select * from system.settings where changed' and read the changelog.");
-            break;
+            res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`";
+            ++obsolete_settings_count;
         }
     }
+
+    if (obsolete_settings_count)
+    {
+        res = res + "]" + (obsolete_settings_count == 1 ? " is" : " are")
+            + " changed. "
+              "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog.";
+        common_warnings.emplace_back(res);
+    }
+
     return common_warnings;
 }
 
diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index d6ae567289c..3e7edacd275 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog.
+Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog.
 1
 1

From 18f4f1a5238c64f3b45e1d6781ef2c7104ab842d Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Mon, 12 Jun 2023 15:11:19 +0000
Subject: [PATCH 011/242] udpate test

---
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 tests/queries/0_stateless/01945_system_warnings.reference | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 3d5b1ca99a5..f0c97acb1f5 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed."
+expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed."
 expect ":) "
 send -- "q\r"
 expect eof
diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index 3e7edacd275..0c05d5d7049 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog.
+Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
 1
 1

From b76ba13250ad5b0abe728875be0e41667450cd5f Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Mon, 12 Jun 2023 15:41:46 +0000
Subject: [PATCH 012/242] fix

---
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index f0c97acb1f5..617e54a375e 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed."
+expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
 expect ":) "
 send -- "q\r"
 expect eof

From 2148f29a40f44f387b2cfbd9d3496bf9bc0b7e8d Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Thu, 8 Jun 2023 20:29:27 -0400
Subject: [PATCH 013/242] More accurate DNS resolve for the keeper connection

---
 src/Common/ZooKeeper/ZooKeeper.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index a587ad6caf4..e078470476a 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -13,6 +13,7 @@
 #include <base/sort.h>
 #include <base/getFQDNOrHostName.h>
 #include "Common/ZooKeeper/IKeeper.h"
+#include <Common/DNSResolver.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>
 #include <Common/logger_useful.h>
@@ -80,8 +81,12 @@ void ZooKeeper::init(ZooKeeperArgs args_)
                 if (secure)
                     host_string.erase(0, strlen("secure://"));
 
-                LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
-                nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
+                /// We want to resolve all hosts without DNS cache for keeper connection.
+                Coordination::DNSResolver::instance().removeHostFromCache(host_string);
+
+                auto address = Coordination::DNSResolver::instance().resolveAddress(host_string);
+                LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, address.toString());
+                nodes.emplace_back(Coordination::ZooKeeper::Node{address, secure});
             }
             catch (const Poco::Net::HostNotFoundException & e)
             {

From bbf0548007432dc5482cd28fda4c31e57dd5c24f Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 13 Jun 2023 02:48:28 +0000
Subject: [PATCH 014/242] fix test

---
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 617e54a375e..9a8e22aa26f 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
+expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
 expect ":) "
 send -- "q\r"
 expect eof

From 6ad6c6afa3bdf1cd95e1454bad9e7eb75db7b0ab Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 13 Jun 2023 04:13:16 +0000
Subject: [PATCH 015/242] fix

---
 src/Interpreters/Context.cpp                              | 2 +-
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 tests/queries/0_stateless/01945_system_warnings.reference | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 823c3d678df..1b8c52ee06b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -783,7 +783,7 @@ Strings Context::getWarnings() const
     {
         if (setting.isValueChanged() && setting.isObsolete())
         {
-            res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`";
+            res += (obsolete_settings_count ? ", '" : "'") + setting.getName() + "'";
             ++obsolete_settings_count;
         }
     }
diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 9a8e22aa26f..5315c56bde8 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
+expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
 expect ":) "
 send -- "q\r"
 expect eof
diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index 0c05d5d7049..dcb296c61aa 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
+Obsolete settings [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
 1
 1

From 404bfe773ef726b63e944b70a8b4253907637b8c Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 13 Jun 2023 06:28:47 +0000
Subject: [PATCH 016/242] fix

---
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 5315c56bde8..9be0eb6e399 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog."
+expect " * Obsolete settings"
 expect ":) "
 send -- "q\r"
 expect eof

From e7d1dfb704caa283174823ba8ff59b6c10ae0e1d Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 13 Jun 2023 08:30:07 +0000
Subject: [PATCH 017/242] fix

---
 tests/queries/0_stateless/01945_system_warnings.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh
index 112baab614e..e44fe0ad6b5 100755
--- a/tests/queries/0_stateless/01945_system_warnings.sh
+++ b/tests/queries/0_stateless/01945_system_warnings.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-parallel
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 733e2905c74a85903ba0c50a9c5e20a002ab8c9a Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Fri, 23 Jun 2023 14:29:49 +0000
Subject: [PATCH 018/242] update docs

---
 .../system-tables/merge_tree_settings.md      | 78 ++++++++++++-------
 .../system-tables/server_settings.md          | 25 ++++--
 docs/en/operations/system-tables/settings.md  | 14 ++--
 3 files changed, 76 insertions(+), 41 deletions(-)

diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index d8539908bf7..b90026154f8 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables.
 
 Columns:
 
-- `name` (String) — Setting name.
-- `value` (String) — Setting value.
-- `description` (String) — Setting description.
-- `type` (String) — Setting type (implementation specific string value).
-- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
+- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
+- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
+- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
+    - `0` — Current user can change the setting.
+    - `1` — Current user can’t change the setting.
+- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
 
 **Example**
 ```sql
@@ -19,37 +25,53 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
 ```response
-Row 1:
+Row 1:                                                                                                                                                                                  
+──────                                                                                                                                                                                  
+name:        min_compress_block_size                                                                                                                                                    
+value:       0
+changed:     0
+description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.
+min:         ____
+max:         ____
+readonly:    0
+type:        UInt64
+is_obsolete: 0
+
+Row 2:
+──────
+name:        max_compress_block_size
+value:       0
+changed:     0
+description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.
+min:         ____
+max:         ____
+readonly:    0
+type:        UInt64
+is_obsolete: 0
+
+Row 3:
 ──────
 name:        index_granularity
 value:       8192
 changed:     0
 description: How many rows correspond to one primary key value.
-type:        SettingUInt64
-
-Row 2:
-──────
-name:        min_bytes_for_wide_part
-value:       0
-changed:     0
-description: Minimal uncompressed size in bytes to create part in wide format instead of compact
-type:        SettingUInt64
-
-Row 3:
-──────
-name:        min_rows_for_wide_part
-value:       0
-changed:     0
-description: Minimal number of rows to create part in wide format instead of compact
-type:        SettingUInt64
+min:         ____
+max:         ____
+readonly:    0
+type:        UInt64
+is_obsolete: 0
 
 Row 4:
 ──────
-name:        merge_max_block_size
-value:       8192
+name:        max_digestion_size_per_segment
+value:       268435456
 changed:     0
-description: How many rows in blocks should be formed for merge operations.
-type:        SettingUInt64
+description: Max number of bytes to digest per segment to build GIN index.
+min:         ____
+max:         ____
+readonly:    0
+type:        UInt64
+is_obsolete: 0
 
-4 rows in set. Elapsed: 0.001 sec.
+4 rows in set. Elapsed: 0.009 sec. 
 ```
diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md
index 3085b1acaf4..df482261ae8 100644
--- a/docs/en/operations/system-tables/server_settings.md
+++ b/docs/en/operations/system-tables/server_settings.md
@@ -14,6 +14,7 @@ Columns:
 - `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
 - `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
 - `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
 
 **Example**
 
@@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%'
 ```
 
 ``` text
-┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
-│ max_thread_pool_size         │ 5000  │ 10000   │       1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │
-│ max_thread_pool_free_size    │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
-│ thread_pool_queue_size       │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │
-│ max_io_thread_pool_size      │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │
-│ max_io_thread_pool_free_size │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │
-│ io_thread_pool_queue_size    │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │
-└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
+┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────_─type───_─is_obsolete─┐
+│ max_thread_pool_size                        │ 10000 │ 10000   │       1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations.                           │ UInt64 │           0 │
+│ max_thread_pool_free_size                   │ 1000  │ 1000    │       0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │           0 │
+│ thread_pool_queue_size                      │ 10000 │ 10000   │       0 │ The maximum number of tasks that will be placed in a queue and wait for execution.                                                                  │ UInt64 │           0 │
+│ max_io_thread_pool_size                     │ 100   │ 100     │       0 │ The maximum number of threads that would be used for IO operations                                                                                  │ UInt64 │           0 │
+│ max_io_thread_pool_free_size                │ 0     │ 0       │       0 │ Max free size for IO thread pool.                                                                                                                   │ UInt64 │           0 │
+│ io_thread_pool_queue_size                   │ 10000 │ 10000   │       0 │ Queue size for IO thread pool.                                                                                                                      │ UInt64 │           0 │
+│ max_active_parts_loading_thread_pool_size   │ 64    │ 64      │       0 │ The number of threads to load active set of data parts (Active ones) at startup.                                                                    │ UInt64 │           0 │
+│ max_outdated_parts_loading_thread_pool_size │ 32    │ 32      │       0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup.                                                                │ UInt64 │           0 │
+│ max_parts_cleaning_thread_pool_size         │ 128   │ 128     │       0 │ The number of threads for concurrent removal of inactive data parts.                                                                                │ UInt64 │           0 │
+│ max_backups_io_thread_pool_size             │ 1000  │ 1000    │       0 │ The maximum number of threads that would be used for IO operations for BACKUP queries                                                               │ UInt64 │           0 │
+│ max_backups_io_thread_pool_free_size        │ 0     │ 0       │       0 │ Max free size for backups IO thread pool.                                                                                                           │ UInt64 │           0 │
+│ backups_io_thread_pool_queue_size           │ 0     │ 0       │       0 │ Queue size for backups IO thread pool.                                                                                                              │ UInt64 │           0 │
+└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────┴────────┴─────────────┘
 ```
 
 Using of `WHERE changed` can be useful, for example, when you want to check 
diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md
index afae45077cc..7dd2345a2d0 100644
--- a/docs/en/operations/system-tables/settings.md
+++ b/docs/en/operations/system-tables/settings.md
@@ -17,6 +17,7 @@ Columns:
     - `0` — Current user can change the setting.
     - `1` — Current user can’t change the setting.
 - `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
 
 **Example**
 
@@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%'
 ```
 
 ``` text
-┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐
-│ min_insert_block_size_rows                  │ 1048576   │       0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.                                                                         │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │        0 │
-│ min_insert_block_size_bytes                 │ 268435456 │       0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.                                                                        │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │        0 │
-│ read_backoff_min_interval_between_events_ms │ 1000      │       0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │        0 │
-└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
+┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐
+│ min_insert_block_size_rows                         │ 1048449   │       0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.                                                                         │ ____ │ ____ │        0 │ UInt64       │ 1048449   │           │           0 │
+│ min_insert_block_size_bytes                        │ 268402944 │       0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.                                                                        │ ____ │ ____ │        0 │ UInt64       │ 268402944 │           │           0 │
+│ min_insert_block_size_rows_for_materialized_views  │ 0         │       0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)                                           │ ____ │ ____ │        0 │ UInt64       │ 0         │           │           0 │
+│ min_insert_block_size_bytes_for_materialized_views │ 0         │       0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)                                         │ ____ │ ____ │        0 │ UInt64       │ 0         │           │           0 │
+│ read_backoff_min_interval_between_events_ms        │ 1000      │       0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │        0 │ Milliseconds │ 1000      │           │           0 │
+└────────────────────────────────────────────────────┴───────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘
 ```
 
 Using of `WHERE changed` can be useful, for example, when you want to check:

From 991d95b1e1cd86da4b78fa4c0976b80bcd3800ac Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Fri, 23 Jun 2023 14:32:08 +0000
Subject: [PATCH 019/242] update docs

---
 docs/en/operations/system-tables/merge_tree_settings.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index b90026154f8..557835ce3b6 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -25,9 +25,9 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
 ```response
-Row 1:                                                                                                                                                                                  
-──────                                                                                                                                                                                  
-name:        min_compress_block_size                                                                                                                                                    
+Row 1:
+──────
+name:        min_compress_block_size
 value:       0
 changed:     0
 description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.
@@ -73,5 +73,5 @@ readonly:    0
 type:        UInt64
 is_obsolete: 0
 
-4 rows in set. Elapsed: 0.009 sec. 
+4 rows in set. Elapsed: 0.009 sec.
 ```

From 57ca8b60e0e060fdaedfd6617de1b5b6e05b11e4 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 24 Jun 2023 03:06:20 +0000
Subject: [PATCH 020/242] Fix

---
 src/Interpreters/Context.cpp | 54 +++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1b8c52ee06b..2b0cc4e70d2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -777,20 +777,27 @@ Strings Context::getWarnings() const
         auto lock = getLock();
         common_warnings = shared->warnings;
     }
-    String res = "Obsolete settings [";
-    size_t obsolete_settings_count = 0;
+    /// Make setting's name ordered
+    std::set<String> obsolete_settings;
     for (const auto & setting : settings)
     {
         if (setting.isValueChanged() && setting.isObsolete())
-        {
-            res += (obsolete_settings_count ? ", '" : "'") + setting.getName() + "'";
-            ++obsolete_settings_count;
-        }
+            obsolete_settings.emplace(setting.getName());
     }
 
-    if (obsolete_settings_count)
+    if (!obsolete_settings.empty())
     {
-        res = res + "]" + (obsolete_settings_count == 1 ? " is" : " are")
+        bool single_element = obsolete_settings.size() == 1;
+        String res = single_element ? "Obsolete setting [" : "Obsolete settings [";
+
+        bool first = true;
+        for (const auto & setting : obsolete_settings)
+        {
+            res += first ? "" : ", ";
+            res += "'" + setting + "'";
+            first = false;
+        }
+        res = res + "]" + (single_element ? " is" : " are")
             + " changed. "
               "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog.";
         common_warnings.emplace_back(res);
@@ -2156,9 +2163,9 @@ BackupsWorker & Context::getBackupsWorker() const
     const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true);
 
     const auto & config = getConfigRef();
-    const auto & settings_ = getSettingsRef();
-    UInt64 backup_threads = config.getUInt64("backup_threads", settings_.backup_threads);
-    UInt64 restore_threads = config.getUInt64("restore_threads", settings_.restore_threads);
+    const auto & settings_ref = getSettingsRef();
+    UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
+    UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
 
     if (!shared->backups_worker)
         shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
@@ -4296,10 +4303,10 @@ ReadSettings Context::getReadSettings() const
 
 ReadSettings Context::getBackupReadSettings() const
 {
-    ReadSettings settings_ = getReadSettings();
-    settings_.remote_throttler = getBackupsThrottler();
-    settings_.local_throttler = getBackupsThrottler();
-    return settings_;
+    ReadSettings read_settings = getReadSettings();
+    read_settings.remote_throttler = getBackupsThrottler();
+    read_settings.local_throttler = getBackupsThrottler();
+    return read_settings;
 }
 
 WriteSettings Context::getWriteSettings() const
@@ -4328,14 +4335,13 @@ std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
 
 Context::ParallelReplicasMode Context::getParallelReplicasMode() const
 {
-    const auto & settings_ = getSettingsRef();
+    const auto & settings_ref = getSettingsRef();
 
     using enum Context::ParallelReplicasMode;
-    if (!settings_.parallel_replicas_custom_key.value.empty())
+    if (!settings_ref.parallel_replicas_custom_key.value.empty())
         return CUSTOM_KEY;
 
-    if (settings_.allow_experimental_parallel_reading_from_replicas > 0
-        && !settings_.use_hedged_requests)
+    if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests)
         return READ_TASKS;
 
     return SAMPLE_KEY;
@@ -4343,17 +4349,15 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
 
 bool Context::canUseParallelReplicasOnInitiator() const
 {
-    const auto & settings_ = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
-        && settings_.max_parallel_replicas > 1
+    const auto & settings_ref = getSettingsRef();
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
         && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
-    const auto & settings_ = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
-        && settings_.max_parallel_replicas > 1
+    const auto & settings_ref = getSettingsRef();
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
         && getClientInfo().collaborate_with_initiator;
 }
 

From f52a1159a2748e73dda2c6f897b3b1ee06d5a147 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 24 Jun 2023 03:52:32 +0000
Subject: [PATCH 021/242] fix test

---
 tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +-
 tests/queries/0_stateless/01945_system_warnings.reference | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect
index 9be0eb6e399..28b114b5af4 100755
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@@ -55,7 +55,7 @@ expect eof
 
 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
-expect " * Obsolete settings"
+expect " * Obsolete setting"
 expect ":) "
 send -- "q\r"
 expect eof
diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index dcb296c61aa..cfec2f63816 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -1,5 +1,5 @@
 Server was built in debug mode. It will work slowly.
 0
-Obsolete settings [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
+Obsolete setting [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
 1
 1

From 550907c432c86f35d9d2dc56e6fefe1d43717c13 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 24 Jun 2023 05:14:01 +0000
Subject: [PATCH 022/242] see output of flaky test

fix

fix
---
 tests/queries/0_stateless/01945_system_warnings.reference | 1 +
 tests/queries/0_stateless/01945_system_warnings.sh        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index cfec2f63816..ce6195d5277 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -3,3 +3,4 @@ Server was built in debug mode. It will work slowly.
 Obsolete setting [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
 1
 1
+3
diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh
index e44fe0ad6b5..26f04fbe31d 100755
--- a/tests/queries/0_stateless/01945_system_warnings.sh
+++ b/tests/queries/0_stateless/01945_system_warnings.sh
@@ -23,4 +23,5 @@ ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.war
 
 # Avoid too many warnings, especially in CI
 ${CLICKHOUSE_CLIENT} -q "SELECT count() < 5 FROM system.warnings"
+${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings"
 

From 8a483f3347a1c39c8a8ee8dfb340501005e8a698 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 24 Jun 2023 13:08:58 +0000
Subject: [PATCH 023/242] fix

---
 tests/queries/0_stateless/01945_system_warnings.reference | 1 -
 tests/queries/0_stateless/01945_system_warnings.sh        | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference
index ce6195d5277..cfec2f63816 100644
--- a/tests/queries/0_stateless/01945_system_warnings.reference
+++ b/tests/queries/0_stateless/01945_system_warnings.reference
@@ -3,4 +3,3 @@ Server was built in debug mode. It will work slowly.
 Obsolete setting [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog.
 1
 1
-3
diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh
index 26f04fbe31d..4f11d63d3d0 100755
--- a/tests/queries/0_stateless/01945_system_warnings.sh
+++ b/tests/queries/0_stateless/01945_system_warnings.sh
@@ -22,6 +22,5 @@ ${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FRO
 ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.warnings"
 
 # Avoid too many warnings, especially in CI
-${CLICKHOUSE_CLIENT} -q "SELECT count() < 5 FROM system.warnings"
-${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings"
+${CLICKHOUSE_CLIENT} -q "SELECT count() <= 5 FROM system.warnings"
 

From 207cd699579cb2da9589e30a050b0684777f415d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 25 Jun 2023 02:37:10 +0200
Subject: [PATCH 024/242] Remove wrong code

---
 src/Functions/FunctionsComparison.h           |  8 +---
 src/Functions/GregorianDate.h                 | 47 +++++++++++++------
 src/Functions/fromModifiedJulianDay.cpp       | 15 +-----
 src/Functions/getTypeSerializationStreams.cpp | 10 +---
 src/Functions/tupleHammingDistance.cpp        |  2 +-
 src/Functions/vectorFunctions.cpp             | 18 +++----
 6 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index 66269f72866..3359aca73bd 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -1178,15 +1178,9 @@ public:
             || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size())
             || (arguments[0]->equals(*arguments[1]))))
         {
-            try
-            {
-                getLeastSupertype(arguments);
-            }
-            catch (const Exception &)
-            {
+            if (!tryGetLeastSupertype(arguments))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})"
                     " of function {}", arguments[0]->getName(), arguments[1]->getName(), getName());
-            }
         }
 
         if (left_tuple && right_tuple)
diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h
index 63bc443fa31..31b3c8df0de 100644
--- a/src/Functions/GregorianDate.h
+++ b/src/Functions/GregorianDate.h
@@ -49,7 +49,15 @@ namespace DB
 
         /** Write the date in text form 'YYYY-MM-DD' to a buffer.
           */
-        void write(WriteBuffer & buf) const;
+        void write(WriteBuffer & buf) const
+        {
+            writeImpl<void>(buf);
+        }
+
+        bool tryWrite(WriteBuffer & buf) const
+        {
+            return writeImpl<bool>(buf);
+        }
 
         /** Convert to a string in text form 'YYYY-MM-DD'.
           */
@@ -65,15 +73,18 @@ namespace DB
             return month_;
         }
 
-        uint8_t day_of_month() const noexcept /// NOLINT
+        uint8_t dayOfMonth() const noexcept
         {
             return day_of_month_;
         }
 
     private:
-        YearT year_; /// NOLINT
-        uint8_t month_; /// NOLINT
-        uint8_t day_of_month_; /// NOLINT
+        YearT year_ = 0;
+        uint8_t month_ = 0;
+        uint8_t day_of_month_ = 0;
+
+        template <typename ReturnType>
+        ReturnType writeImpl(WriteBuffer & buf) const;
     };
 
     /** ISO 8601 Ordinal Date. YearT is an integral type which should
@@ -110,8 +121,8 @@ namespace DB
         }
 
     private:
-        YearT year_; /// NOLINT
-        uint16_t day_of_year_; /// NOLINT
+        YearT year_ = 0;
+        uint16_t day_of_year_ = 0;
     };
 
     class MonthDay
@@ -135,18 +146,17 @@ namespace DB
             return month_;
         }
 
-        uint8_t day_of_month() const noexcept /// NOLINT
+        uint8_t dayOfMonth() const noexcept
         {
             return day_of_month_;
         }
 
     private:
-        uint8_t month_; /// NOLINT
-        uint8_t day_of_month_; /// NOLINT
+        uint8_t month_ = 0;
+        uint8_t day_of_month_ = 0;
     };
 }
 
-/* Implementation */
 
 namespace gd
 {
@@ -258,9 +268,10 @@ namespace DB
     {
         const OrdinalDate<YearT> ord(modified_julian_day);
         const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
+
         year_       = ord.year();
         month_      = md.month();
-        day_of_month_ = md.day_of_month();
+        day_of_month_ = md.dayOfMonth();
     }
 
     template <typename YearT>
@@ -274,12 +285,16 @@ namespace DB
     }
 
     template <typename YearT>
-    void GregorianDate<YearT>::write(WriteBuffer & buf) const
+    template <typename ReturnType>
+    ReturnType GregorianDate<YearT>::writeImpl(WriteBuffer & buf) const
     {
         if (year_ < 0 || year_ > 9999)
         {
-            throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
-                "Impossible to stringify: year too big or small: {}", DB::toString(year_));
+            if constexpr (std::is_same_v<ReturnType, void>)
+                throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
+                    "Impossible to stringify: year too big or small: {}", DB::toString(year_));
+            else
+                return false;
         }
         else
         {
@@ -301,6 +316,8 @@ namespace DB
             writeChar('0' + d / 10, buf); d %= 10;
             writeChar('0' + d     , buf);
         }
+
+        return ReturnType();
     }
 
     template <typename YearT>
diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index 8e76bb27ff1..a7c2c04bf01 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -56,19 +56,8 @@ namespace DB
             {
                 if constexpr (nullOnErrors)
                 {
-                    try
-                    {
-                        const GregorianDate<> gd(vec_from[i]);
-                        gd.write(write_buffer);
-                        (*vec_null_map_to)[i] = false;
-                    }
-                    catch (const Exception & e)
-                    {
-                        if (e.code() == ErrorCodes::CANNOT_FORMAT_DATETIME)
-                            (*vec_null_map_to)[i] = true;
-                        else
-                            throw;
-                    }
+                    const GregorianDate<> gd(vec_from[i]);
+                    (*vec_null_map_to)[i] = gd.tryWrite(write_buffer);
                     writeChar(0, write_buffer);
                     offsets_to[i] = write_buffer.count();
                 }
diff --git a/src/Functions/getTypeSerializationStreams.cpp b/src/Functions/getTypeSerializationStreams.cpp
index 2b13f0f140d..da9fce70ee9 100644
--- a/src/Functions/getTypeSerializationStreams.cpp
+++ b/src/Functions/getTypeSerializationStreams.cpp
@@ -65,15 +65,7 @@ private:
         if (!arg_string)
             return argument.type;
 
-        try
-        {
-            DataTypePtr type = DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
-            return type;
-        }
-        catch (const DB::Exception &)
-        {
-            return argument.type;
-        }
+        return DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
     }
 };
 
diff --git a/src/Functions/tupleHammingDistance.cpp b/src/Functions/tupleHammingDistance.cpp
index adc063bfa81..6a78928c7da 100644
--- a/src/Functions/tupleHammingDistance.cpp
+++ b/src/Functions/tupleHammingDistance.cpp
@@ -86,7 +86,7 @@ public:
                 auto plus_elem = plus->build({left_type, right_type});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp
index db907af972d..d53d39e2f3b 100644
--- a/src/Functions/vectorFunctions.cpp
+++ b/src/Functions/vectorFunctions.cpp
@@ -95,7 +95,7 @@ public:
                 auto elem_func = func->build(ColumnsWithTypeAndName{left, right});
                 types[i] = elem_func->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -181,7 +181,7 @@ public:
                 auto elem_negate = negate->build(ColumnsWithTypeAndName{cur});
                 types[i] = elem_negate->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -258,7 +258,7 @@ public:
                 auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column});
                 types[i] = elem_func->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -363,7 +363,7 @@ public:
                 auto plus_elem = plus->build({left_type, right_type});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -467,7 +467,7 @@ public:
                 auto plus_elem = plus->build({left, right});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -740,7 +740,7 @@ public:
                 auto plus_elem = plus->build({left_type, right_type});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -842,7 +842,7 @@ public:
                 auto plus_elem = plus->build({left_type, right_type});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -993,7 +993,7 @@ public:
                 auto max_elem = max->build({left_type, right_type});
                 res_type = max_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;
@@ -1103,7 +1103,7 @@ public:
                 auto plus_elem = plus->build({left_type, right_type});
                 res_type = plus_elem->getResultType();
             }
-            catch (DB::Exception & e)
+            catch (Exception & e)
             {
                 e.addMessage("While executing function {} for tuple element {}", getName(), i);
                 throw;

From bd7913a2275feb9a2e7e4a01889b1ff7297d6c24 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 25 Jun 2023 02:39:25 +0200
Subject: [PATCH 025/242] Delete a line

---
 src/Functions/tupleHammingDistance.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/tupleHammingDistance.cpp b/src/Functions/tupleHammingDistance.cpp
index 6a78928c7da..ffdf8c93f15 100644
--- a/src/Functions/tupleHammingDistance.cpp
+++ b/src/Functions/tupleHammingDistance.cpp
@@ -1,5 +1,4 @@
 #include <Columns/ColumnTuple.h>
-#include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionFactory.h>

From 27f41869a92037efb3f5c8868d8c0710cfef5fdd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 25 Jun 2023 09:11:42 +0200
Subject: [PATCH 026/242] Remove code that I don't like

---
 src/Formats/ReadSchemaUtils.h                 |  4 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 30 +++++----
 src/Storages/MergeTree/MergeTreeData.h        |  2 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 38 +++++------
 .../MergeTree/MergeTreeDataMergerMutator.h    | 10 +--
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 63 +++++++-----------
 .../MergeTree/ReplicatedMergeTreeQueue.h      | 14 ++--
 src/Storages/StorageMergeTree.cpp             | 65 ++++++++-----------
 src/Storages/StorageMergeTree.h               |  6 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 13 ++--
 10 files changed, 106 insertions(+), 139 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index 82fbb3f7c46..6b4c78a4ff6 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -9,14 +9,14 @@ namespace DB
 
 using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>(ColumnsDescription &)>;
 
-/// Try to determine the schema of the data in specifying format.
+/// Try to determine the schema of the data in the specified format.
 /// For formats that have an external schema reader, it will
 /// use it and won't create a read buffer.
 /// For formats that have a schema reader from the data,
 /// read buffer will be created by the provided iterator and
 /// the schema will be extracted from the data. If schema reader
 /// couldn't determine the schema we will try the next read buffer
-/// from provided iterator if it makes sense. If format doesn't
+/// from the provided iterator if it makes sense. If the format doesn't
 /// have any schema reader or we couldn't determine the schema,
 /// an exception will be thrown.
 ColumnsDescription readSchemaFromFormat(
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e9c3a7f66ae..9dfb1b3371d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -382,7 +382,7 @@ MergeTreeData::MergeTreeData(
     checkTTLExpressions(metadata_, metadata_);
 
     String reason;
-    if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty())
+    if (!canUsePolymorphicParts(*settings, reason) && !reason.empty())
         LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part'and 'min_bytes_for_wide_part' will be ignored.", reason);
 
 #if !USE_ROCKSDB
@@ -3319,7 +3319,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
                 MergeTreeSettings copy = *getSettings();
                 copy.applyChange(changed_setting);
                 String reason;
-                if (!canUsePolymorphicParts(copy, &reason) && !reason.empty())
+                if (!canUsePolymorphicParts(copy, reason) && !reason.empty())
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason);
             }
 
@@ -3344,7 +3344,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
                 auto copy = getDefaultSettings();
                 copy->applyChanges(new_changes);
                 String reason;
-                if (!canUsePolymorphicParts(*copy, &reason) && !reason.empty())
+                if (!canUsePolymorphicParts(*copy, reason) && !reason.empty())
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason);
             }
 
@@ -3386,8 +3386,9 @@ MergeTreeDataPartFormat MergeTreeData::choosePartFormat(size_t bytes_uncompresse
     using PartType = MergeTreeDataPartType;
     using PartStorageType = MergeTreeDataPartStorageType;
 
-     const auto settings = getSettings();
-     if (!canUsePolymorphicParts(*settings))
+    String out_reason;
+    const auto settings = getSettings();
+    if (!canUsePolymorphicParts(*settings, out_reason))
         return {PartType::Wide, PartStorageType::Full};
 
     auto satisfies = [&](const auto & min_bytes_for, const auto & min_rows_for)
@@ -7976,22 +7977,23 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const
 
 bool MergeTreeData::canUsePolymorphicParts() const
 {
-    return canUsePolymorphicParts(*getSettings(), nullptr);
+    String unused;
+    return canUsePolymorphicParts(*getSettings(), unused);
 }
 
-bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const
+bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String & out_reason) const
 {
     if (!canUseAdaptiveGranularity())
     {
-        if (out_reason && (settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0
+        if ((settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0
             || settings.min_rows_for_compact_part != 0 || settings.min_bytes_for_compact_part != 0))
         {
-            *out_reason = fmt::format(
-                    "Table can't create parts with adaptive granularity, but settings"
-                    " min_rows_for_wide_part = {}"
-                    ", min_bytes_for_wide_part = {}"
-                    ". Parts with non-adaptive granularity can be stored only in Wide (default) format.",
-                    settings.min_rows_for_wide_part, settings.min_bytes_for_wide_part);
+            out_reason = fmt::format(
+                "Table can't create parts with adaptive granularity, but settings"
+                " min_rows_for_wide_part = {}"
+                ", min_bytes_for_wide_part = {}"
+                ". Parts with non-adaptive granularity can be stored only in Wide (default) format.",
+                settings.min_rows_for_wide_part, settings.min_bytes_for_wide_part);
         }
 
         return false;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index b27392b355b..cefebe8fc58 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1494,7 +1494,7 @@ private:
     /// Check selected parts for movements. Used by ALTER ... MOVE queries.
     CurrentlyMovingPartsTaggerPtr checkPartsForMove(const DataPartsVector & parts, SpacePtr space);
 
-    bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason = nullptr) const;
+    bool canUsePolymorphicParts(const MergeTreeSettings & settings, String & out_reason) const;
 
     std::mutex write_ahead_log_mutex;
     WriteAheadLogPtr write_ahead_log;
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 29a1574b66e..5a9a00f1059 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -136,7 +136,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
     const AllowedMergingPredicate & can_merge_callback,
     bool merge_with_ttl_allowed,
     const MergeTreeTransactionPtr & txn,
-    String * out_disable_reason,
+    String & out_disable_reason,
     const PartitionIdsHint * partitions_hint)
 {
     MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn, partitions_hint);
@@ -145,8 +145,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
 
     if (data_parts.empty())
     {
-        if (out_disable_reason)
-            *out_disable_reason = "There are no parts in the table";
+        out_disable_reason = "There are no parts in the table";
         return SelectPartsDecision::CANNOT_SELECT;
     }
 
@@ -154,8 +153,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
 
     if (info.parts_selected_precondition == 0)
     {
-        if (out_disable_reason)
-            *out_disable_reason = "No parts satisfy preconditions for merge";
+        out_disable_reason = "No parts satisfy preconditions for merge";
         return SelectPartsDecision::CANNOT_SELECT;
     }
 
@@ -179,8 +177,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
                 /*optimize_skip_merged_partitions=*/true);
     }
 
-    if (out_disable_reason)
-        *out_disable_reason = "There is no need to merge parts according to merge selector algorithm";
+    out_disable_reason = "There is no need to merge parts according to merge selector algorithm";
     return SelectPartsDecision::CANNOT_SELECT;
 }
 
@@ -197,7 +194,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart
 
     auto metadata_snapshot = data.getInMemoryMetadataPtr();
 
-    MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn);
+    String out_reason;
+    MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn, out_reason);
 
     if (info.parts_selected_precondition == 0)
         return res;
@@ -227,7 +225,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart
         /// This method should have been const, but something went wrong... it's const with dry_run = true
         auto status = const_cast<MergeTreeDataMergerMutator *>(this)->selectPartsToMergeFromRanges(
                 future_part, /*aggressive*/ false, max_total_size_to_merge, merge_with_ttl_allowed,
-                metadata_snapshot, ranges_per_partition[i], info.current_time, &out_disable_reason,
+                metadata_snapshot, ranges_per_partition[i], info.current_time, out_disable_reason,
                 /* dry_run */ true);
         if (status == SelectPartsDecision::SELECTED)
             res.insert(all_partition_ids[i]);
@@ -330,7 +328,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
     const MergeTreeData::DataPartsVector & data_parts,
     const AllowedMergingPredicate & can_merge_callback,
     const MergeTreeTransactionPtr & txn,
-    String * out_disable_reason) const
+    String & out_disable_reason) const
 {
     MergeSelectingInfo res;
 
@@ -443,7 +441,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
     const StorageMetadataPtr & metadata_snapshot,
     const IMergeSelector::PartsRanges & parts_ranges,
     const time_t & current_time,
-    String * out_disable_reason,
+    String & out_disable_reason,
     bool dry_run)
 {
     const auto data_settings = data.getSettings();
@@ -514,8 +512,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
         if (parts_to_merge.empty())
         {
-            if (out_disable_reason)
-                *out_disable_reason = "Did not find any parts to merge (with usual merge selectors)";
+            out_disable_reason = "Did not find any parts to merge (with usual merge selectors)";
             return SelectPartsDecision::CANNOT_SELECT;
         }
     }
@@ -562,22 +559,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
     bool final,
     const StorageMetadataPtr & metadata_snapshot,
     const MergeTreeTransactionPtr & txn,
-    String * out_disable_reason,
+    String & out_disable_reason,
     bool optimize_skip_merged_partitions)
 {
     MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id);
 
     if (parts.empty())
     {
-        if (out_disable_reason)
-            *out_disable_reason = "There are no parts inside partition";
+        out_disable_reason = "There are no parts inside partition";
         return SelectPartsDecision::CANNOT_SELECT;
     }
 
     if (!final && parts.size() == 1)
     {
-        if (out_disable_reason)
-            *out_disable_reason = "There is only one part inside partition";
+        out_disable_reason = "There is only one part inside partition";
         return SelectPartsDecision::CANNOT_SELECT;
     }
 
@@ -586,8 +581,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
     if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 &&
         (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot)))
     {
-        if (out_disable_reason)
-            *out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions";
+        out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions";
         return SelectPartsDecision::NOTHING_TO_MERGE;
     }
 
@@ -628,9 +622,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
                 static_cast<int>((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100));
         }
 
-        if (out_disable_reason)
-            *out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space));
-
+        out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space));
         return SelectPartsDecision::CANNOT_SELECT;
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 428161ea71e..6eab0ee0c37 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -43,7 +43,7 @@ public:
     using AllowedMergingPredicate = std::function<bool (const MergeTreeData::DataPartPtr &,
                                                         const MergeTreeData::DataPartPtr &,
                                                         const MergeTreeTransaction *,
-                                                        String *)>;
+                                                        String &)>;
 
     explicit MergeTreeDataMergerMutator(MergeTreeData & data_);
 
@@ -92,7 +92,7 @@ public:
         const MergeTreeData::DataPartsVector & data_parts,
         const AllowedMergingPredicate & can_merge_callback,
         const MergeTreeTransactionPtr & txn,
-        String * out_disable_reason = nullptr) const;
+        String & out_disable_reason) const;
 
     /// The third step of selecting parts to merge: takes ranges that we can merge, and selects parts that we want to merge
     SelectPartsDecision selectPartsToMergeFromRanges(
@@ -103,7 +103,7 @@ public:
         const StorageMetadataPtr & metadata_snapshot,
         const IMergeSelector::PartsRanges & parts_ranges,
         const time_t & current_time,
-        String * out_disable_reason = nullptr,
+        String & out_disable_reason,
         bool dry_run = false);
 
     String getBestPartitionToOptimizeEntire(const PartitionsInfo & partitions_info) const;
@@ -129,7 +129,7 @@ public:
         const AllowedMergingPredicate & can_merge,
         bool merge_with_ttl_allowed,
         const MergeTreeTransactionPtr & txn,
-        String * out_disable_reason = nullptr,
+        String & out_disable_reason,
         const PartitionIdsHint * partitions_hint = nullptr);
 
     /** Select all the parts in the specified partition for merge, if possible.
@@ -144,7 +144,7 @@ public:
         bool final,
         const StorageMetadataPtr & metadata_snapshot,
         const MergeTreeTransactionPtr & txn,
-        String * out_disable_reason = nullptr,
+        String & out_disable_reason,
         bool optimize_skip_merged_partitions = false);
 
     /** Creates a task to merge parts.
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 03ded2ef260..295e717c08c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -2197,7 +2197,7 @@ bool LocalMergePredicate::operator()(
     const MergeTreeData::DataPartPtr & left,
     const MergeTreeData::DataPartPtr & right,
     const MergeTreeTransaction *,
-    String * out_reason) const
+    String & out_reason) const
 {
     if (left)
         return canMergeTwoParts(left, right, out_reason);
@@ -2209,7 +2209,7 @@ bool ReplicatedMergeTreeMergePredicate::operator()(
     const MergeTreeData::DataPartPtr & left,
     const MergeTreeData::DataPartPtr & right,
     const MergeTreeTransaction *,
-    String * out_reason) const
+    String & out_reason) const
 {
     if (left)
         return canMergeTwoParts(left, right, out_reason);
@@ -2221,7 +2221,7 @@ bool ReplicatedMergeTreeMergePredicate::operator()(
 bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts(
     const MergeTreeData::DataPartPtr & left,
     const MergeTreeData::DataPartPtr & right,
-    String * out_reason) const
+    String & out_reason) const
 {
     /// A sketch of a proof of why this method actually works:
     ///
@@ -2265,22 +2265,19 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts(
     {
         if (pinned_part_uuids.part_uuids.contains(part->uuid))
         {
-            if (out_reason)
-                *out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned";
+            out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned";
             return false;
         }
 
         if (part->name == inprogress_quorum_part)
         {
-            if (out_reason)
-                *out_reason = "Quorum insert for part " + part->name + " is currently in progress";
+            out_reason = "Quorum insert for part " + part->name + " is currently in progress";
             return false;
         }
 
         if (prev_virtual_parts.getContainingPart(part->info).empty())
         {
-            if (out_reason)
-                *out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet";
+            out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet";
             return false;
         }
     }
@@ -2294,8 +2291,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts(
     {
         if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id))
         {
-            if (out_reason)
-                *out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id);
+            out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id);
             return false;
         }
 
@@ -2307,10 +2303,8 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts(
             auto block_it = block_numbers.upper_bound(left_max_block);
             if (block_it != block_numbers.end() && *block_it < right_min_block)
             {
-                if (out_reason)
-                    *out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts "
-                        + left->name + " and " + right->name;
-
+                out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts "
+                    + left->name + " and " + right->name;
                 return false;
             }
         }
@@ -2322,7 +2316,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts(
 bool LocalMergePredicate::canMergeTwoParts(
     const MergeTreeData::DataPartPtr & left,
     const MergeTreeData::DataPartPtr & right,
-    String * out_reason) const
+    String & out_reason) const
 {
     Int64 left_max_block = left->info.max_block;
     Int64 right_min_block = right->info.min_block;
@@ -2336,8 +2330,7 @@ bool LocalMergePredicate::canMergeTwoParts(
         String containing_part = queue.virtual_parts.getContainingPart(part->info);
         if (containing_part != part->name)
         {
-            if (out_reason)
-                *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part;
+            out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part;
             return false;
         }
     }
@@ -2354,10 +2347,9 @@ bool LocalMergePredicate::canMergeTwoParts(
         Strings covered = queue.virtual_parts.getPartsCoveredBy(gap_part_info);
         if (!covered.empty())
         {
-            if (out_reason)
-                *out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front()
-                    + " to " + covered.back() + ") that are still not present or being processed by "
-                    + " other background process on this replica between " + left->name + " and " + right->name;
+            out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front()
+                + " to " + covered.back() + ") that are still not present or being processed by "
+                + " other background process on this replica between " + left->name + " and " + right->name;
             return false;
         }
     }
@@ -2370,9 +2362,8 @@ bool LocalMergePredicate::canMergeTwoParts(
 
     if (left_mutation_ver != right_mutation_ver)
     {
-        if (out_reason)
-            *out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: "
-                + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively";
+        out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: "
+            + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively";
         return false;
     }
 
@@ -2381,33 +2372,30 @@ bool LocalMergePredicate::canMergeTwoParts(
 
 bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart(
     const MergeTreeData::DataPartPtr & part,
-    String * out_reason) const
+    String & out_reason) const
 {
     if (pinned_part_uuids.part_uuids.contains(part->uuid))
     {
-        if (out_reason)
-            *out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid);
+        out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid);
         return false;
     }
 
     if (part->name == inprogress_quorum_part)
     {
-        if (out_reason)
-            *out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name);
+        out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name);
         return false;
     }
 
     if (prev_virtual_parts.getContainingPart(part->info).empty())
     {
-        if (out_reason)
-            *out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name);
+        out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name);
         return false;
     }
 
     return nested_pred.canMergeSinglePart(part, out_reason);
 }
 
-bool LocalMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const
+bool LocalMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const
 {
     std::lock_guard lock(queue.state_mutex);
 
@@ -2416,8 +2404,7 @@ bool LocalMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr &
     String containing_part = queue.virtual_parts.getContainingPart(part->info);
     if (containing_part != part->name)
     {
-        if (out_reason)
-            *out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part);
+        out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part);
         return false;
     }
 
@@ -2425,7 +2412,7 @@ bool LocalMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr &
 }
 
 
-bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String * out_reason) const
+bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const
 {
     std::lock_guard lock(queue.state_mutex);
     for (const auto & entry : queue.queue)
@@ -2438,9 +2425,7 @@ bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const Mer
             if (part->info.isDisjoint(MergeTreePartInfo::fromPartName(part_name, queue.format_version)))
                 continue;
 
-            if (out_reason)
-                *out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name);
-
+            out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name);
             return true;
         }
     }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 79572e13963..26db5a05b45 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -501,13 +501,13 @@ public:
     bool operator()(const MergeTreeData::DataPartPtr & left,
                     const MergeTreeData::DataPartPtr & right,
                     const MergeTreeTransaction * txn,
-                    String * out_reason = nullptr) const;
+                    String & out_reason) const;
 
     bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left,
                           const MergeTreeData::DataPartPtr & right,
-                          String * out_reason = nullptr) const;
+                          String & out_reason) const;
 
-    bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const;
+    bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const;
 
 private:
     const ReplicatedMergeTreeQueue & queue;
@@ -523,23 +523,23 @@ public:
     bool operator()(const MergeTreeData::DataPartPtr & left,
                     const MergeTreeData::DataPartPtr & right,
                     const MergeTreeTransaction * txn,
-                    String * out_reason = nullptr) const;
+                    String & out_reason) const;
 
     /// Can we assign a merge with these two parts?
     /// (assuming that no merge was assigned after the predicate was constructed)
     /// If we can't and out_reason is not nullptr, set it to the reason why we can't merge.
     bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left,
                           const MergeTreeData::DataPartPtr & right,
-                          String * out_reason = nullptr) const;
+                          String & out_reason) const;
 
     /// Can we assign a merge this part and some other part?
     /// For example a merge of a part and itself is needed for TTL.
     /// This predicate is checked for the first part of each range.
-    bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const;
+    bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const;
 
     /// Returns true if part is needed for some REPLACE_RANGE entry.
     /// We should not drop part in this case, because replication queue may stuck without that part.
-    bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String * out_reason) const;
+    bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const;
 
     /// Return nonempty optional of desired mutation version and alter version.
     /// If we have no alter (modify/drop) mutations in mutations queue, than we return biggest possible
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index c02c96f62be..c21b87c223d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -839,7 +839,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
     bool aggressive,
     const String & partition_id,
     bool final,
-    String * out_disable_reason,
+    String & out_disable_reason,
     TableLockHolder & /* table_lock_holder */,
     std::unique_lock<std::mutex> & lock,
     const MergeTreeTransactionPtr & txn,
@@ -857,7 +857,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
     CurrentlyMergingPartsTaggerPtr merging_tagger;
     MergeList::EntryPtr merge_entry;
 
-    auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String * disable_reason) -> bool
+    auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String & disable_reason) -> bool
     {
         if (tx)
         {
@@ -866,8 +866,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
             if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))
                     || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)))
             {
-                if (disable_reason)
-                    *disable_reason = "Some part is not visible in transaction";
+                disable_reason = "Some part is not visible in transaction";
                 return false;
             }
 
@@ -875,8 +874,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
             if ((left && left->version.isRemovalTIDLocked())
                     || (right && right->version.isRemovalTIDLocked()))
             {
-                if (disable_reason)
-                    *disable_reason = "Some part is locked for removal in another cuncurrent transaction";
+                disable_reason = "Some part is locked for removal in another cuncurrent transaction";
                 return false;
             }
         }
@@ -887,8 +885,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
         {
             if (currently_merging_mutating_parts.contains(right))
             {
-                if (disable_reason)
-                    *disable_reason = "Some part currently in a merging or mutating process";
+                disable_reason = "Some part currently in a merging or mutating process";
                 return false;
             }
             else
@@ -897,30 +894,26 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
 
         if (currently_merging_mutating_parts.contains(left) || currently_merging_mutating_parts.contains(right))
         {
-            if (disable_reason)
-                *disable_reason = "Some part currently in a merging or mutating process";
+            disable_reason = "Some part currently in a merging or mutating process";
             return false;
         }
 
         if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock))
         {
-            if (disable_reason)
-                *disable_reason = "Some parts have differ mmutatuon version";
+            disable_reason = "Some parts have differ mmutatuon version";
             return false;
         }
 
         if (!partsContainSameProjections(left, right))
         {
-            if (disable_reason)
-                *disable_reason = "Some parts contains differ projections";
+            disable_reason = "Some parts contains differ projections";
             return false;
         }
 
         auto max_possible_level = getMaxLevelInBetween(left, right);
         if (max_possible_level > std::max(left->info.level, right->info.level))
         {
-            if (disable_reason)
-                *disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level);
+            disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level);
             return false;
         }
 
@@ -931,10 +924,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
 
     if (!canEnqueueBackgroundTask())
     {
-        if (out_disable_reason)
-            *out_disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})",
-                formatReadableSizeWithBinarySuffix(background_memory_tracker.get()),
-                formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit()));
+        out_disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})",
+            formatReadableSizeWithBinarySuffix(background_memory_tracker.get()),
+            formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit()));
     }
     else if (partition_id.empty())
     {
@@ -955,8 +947,8 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
                 txn,
                 out_disable_reason);
         }
-        else if (out_disable_reason)
-            *out_disable_reason = "Current value of max_source_parts_size is zero";
+        else
+            out_disable_reason = "Current value of max_source_parts_size is zero";
     }
     else
     {
@@ -970,15 +962,14 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
             /// If final - we will wait for currently processing merges to finish and continue.
             if (final
                 && select_decision != SelectPartsDecision::SELECTED
-                && !currently_merging_mutating_parts.empty()
-                && out_disable_reason)
+                && !currently_merging_mutating_parts.empty())
             {
                 LOG_DEBUG(log, "Waiting for currently running merges ({} parts are merging right now) to perform OPTIMIZE FINAL",
                     currently_merging_mutating_parts.size());
 
                 if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for(lock, timeout))
                 {
-                    *out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms);
+                    out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms);
                     break;
                 }
             }
@@ -994,14 +985,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
 
     if (select_decision != SelectPartsDecision::SELECTED)
     {
-        if (out_disable_reason)
-        {
-            if (!out_disable_reason->empty())
-            {
-                *out_disable_reason += ". ";
-            }
-            *out_disable_reason += "Cannot select parts for optimization";
-        }
+        if (!out_disable_reason.empty())
+            out_disable_reason += ". ";
+        out_disable_reason += "Cannot select parts for optimization";
 
         return {};
     }
@@ -1022,7 +1008,7 @@ bool StorageMergeTree::merge(
     const Names & deduplicate_by_columns,
     bool cleanup,
     const MergeTreeTransactionPtr & txn,
-    String * out_disable_reason,
+    String & out_disable_reason,
     bool optimize_skip_merged_partitions)
 {
     auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
@@ -1077,7 +1063,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p
 }
 
 MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
-    const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */,
+    const StorageMetadataPtr & metadata_snapshot, String & /* disable_reason */, TableLockHolder & /* table_lock_holder */,
     std::unique_lock<std::mutex> & /*currently_processing_in_background_mutex_lock*/)
 {
     if (current_mutations_by_version.empty())
@@ -1278,10 +1264,11 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
         if (merger_mutator.merges_blocker.isCancelled())
             return false;
 
-        merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, shared_lock, lock, txn);
+        String out_reason;
+        merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, out_reason, shared_lock, lock, txn);
 
         if (!merge_entry && !current_mutations_by_version.empty())
-            mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, shared_lock, lock);
+            mutate_entry = selectPartsToMutate(metadata_snapshot, out_reason, shared_lock, lock);
 
         has_mutations = !current_mutations_by_version.empty();
     }
@@ -1484,7 +1471,7 @@ bool StorageMergeTree::optimize(
                     deduplicate_by_columns,
                     cleanup,
                     txn,
-                    &disable_reason,
+                    disable_reason,
                     local_context->getSettingsRef().optimize_skip_merged_partitions))
             {
                 constexpr auto message = "Cannot OPTIMIZE table: {}";
@@ -1512,7 +1499,7 @@ bool StorageMergeTree::optimize(
                 deduplicate_by_columns,
                 cleanup,
                 txn,
-                &disable_reason,
+                disable_reason,
                 local_context->getSettingsRef().optimize_skip_merged_partitions))
         {
             constexpr auto message = "Cannot OPTIMIZE table: {}";
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 8099f9c16aa..370283e650c 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -176,7 +176,7 @@ private:
             const Names & deduplicate_by_columns,
             bool cleanup,
             const MergeTreeTransactionPtr & txn,
-            String * out_disable_reason = nullptr,
+            String & out_disable_reason,
             bool optimize_skip_merged_partitions = false);
 
     void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction);
@@ -203,7 +203,7 @@ private:
         bool aggressive,
         const String & partition_id,
         bool final,
-        String * disable_reason,
+        String & disable_reason,
         TableLockHolder & table_lock_holder,
         std::unique_lock<std::mutex> & lock,
         const MergeTreeTransactionPtr & txn,
@@ -212,7 +212,7 @@ private:
 
 
     MergeMutateSelectedEntryPtr selectPartsToMutate(
-        const StorageMetadataPtr & metadata_snapshot, String * disable_reason,
+        const StorageMetadataPtr & metadata_snapshot, String & disable_reason,
         TableLockHolder & table_lock_holder, std::unique_lock<std::mutex> & currently_processing_in_background_mutex_lock);
 
     /// For current mutations queue, returns maximum version of mutation for a part,
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index b1ba06c77f9..4298deb06dd 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3438,9 +3438,10 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
                 merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in));
         }
 
+        String out_reason;
         if (can_assign_merge &&
             merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred,
-                                              merge_with_ttl_allowed, NO_TRANSACTION_PTR, nullptr, &partitions_to_merge_in) == SelectPartsDecision::SELECTED)
+                merge_with_ttl_allowed, NO_TRANSACTION_PTR, out_reason, &partitions_to_merge_in) == SelectPartsDecision::SELECTED)
         {
             create_result = createLogEntryToMergeParts(
                 zookeeper,
@@ -5169,13 +5170,13 @@ bool StorageReplicatedMergeTree::optimize(
             {
                 select_decision = merger_mutator.selectPartsToMerge(
                     future_merged_part, /* aggressive */ true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool,
-                    can_merge, /* merge_with_ttl_allowed */ false, NO_TRANSACTION_PTR, &disable_reason);
+                    can_merge, /* merge_with_ttl_allowed */ false, NO_TRANSACTION_PTR, disable_reason);
             }
             else
             {
                 select_decision = merger_mutator.selectAllPartsToMergeWithinPartition(
                     future_merged_part, can_merge, partition_id, final, metadata_snapshot, NO_TRANSACTION_PTR,
-                    &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions);
+                    disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions);
             }
 
             /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization)
@@ -7711,7 +7712,7 @@ void StorageReplicatedMergeTree::movePartitionToShard(
 
         /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code.
         String out_reason;
-        if (!merge_pred.canMergeSinglePart(part, &out_reason))
+        if (!merge_pred.canMergeSinglePart(part, out_reason))
             throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason);
     }
 
@@ -7959,14 +7960,14 @@ bool StorageReplicatedMergeTree::dropPartImpl(
         /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already
         /// finished the merge.
         String out_reason;
-        if (!merge_pred.canMergeSinglePart(part, &out_reason))
+        if (!merge_pred.canMergeSinglePart(part, out_reason))
         {
             if (throw_if_noop)
                 throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
             return false;
         }
 
-        if (merge_pred.partParticipatesInReplaceRange(part, &out_reason))
+        if (merge_pred.partParticipatesInReplaceRange(part, out_reason))
         {
             if (throw_if_noop)
                 throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED);

From 769169f820bffcf99539f654e01640e419582f92 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Sun, 25 Jun 2023 18:24:02 +0800
Subject: [PATCH 027/242] fix heap overflow in read buffer from hdfs

---
 src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
index ee8e0764db0..483f0894cc4 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
@@ -89,7 +89,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
             if (read_until_position < file_offset)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", file_offset, read_until_position - 1);
 
-            num_bytes_to_read = read_until_position - file_offset;
+            num_bytes_to_read = std::min<size_t>(read_until_position - file_offset, internal_buffer.size());
         }
         else
         {

From 1d2600f7068268802090b24d88d2c959325e7361 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 26 Jun 2023 16:47:13 +0800
Subject: [PATCH 028/242] add example for test

---
 src/IO/examples/read_buffer_from_hdfs.cpp | 25 +++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 src/IO/examples/read_buffer_from_hdfs.cpp

diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp
new file mode 100644
index 00000000000..07c56961db0
--- /dev/null
+++ b/src/IO/examples/read_buffer_from_hdfs.cpp
@@ -0,0 +1,25 @@
+#include <iostream>
+#include <memory>
+#include <string>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/copyData.h>
+#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#include <base/types.h>
+#include <Common/Config/ConfigProcessor.h>
+
+using namespace DB;
+
+int main()
+{
+    setenv("LIBHDFS3_CONF", "/data1/clickhouse_official/conf/hdfs-site.bigocluster.xml", true); /// NOLINT
+    String hdfs_uri = "hdfs://bigocluster";
+    String hdfs_file_path = "/data/hive/report_tb.db/bigolive_wj_pos_sdk_video_stats_event_allv1/day=2023-03-14/"
+                            "part-00014-272de29e-098c-4007-987a-f6b7ae740402-c000";
+    ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
+    ReadSettings read_settings;
+    ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 625150306UL, false);
+
+    String download_path = "./download";
+    WriteBufferFromFile write_buffer(download_path);
+    copyData(read_buffer, write_buffer);
+}

From d208b0de3d0168b298cdc0410dc0bdaa8c33532c Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 26 Jun 2023 16:50:18 +0800
Subject: [PATCH 029/242] add example for test

---
 src/IO/examples/CMakeLists.txt            |  6 ++++++
 src/IO/examples/read_buffer_from_hdfs.cpp | 10 +++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt
index b42aa1a4f96..12b85c483a1 100644
--- a/src/IO/examples/CMakeLists.txt
+++ b/src/IO/examples/CMakeLists.txt
@@ -73,3 +73,9 @@ target_link_libraries (snappy_read_buffer PRIVATE clickhouse_common_io)
 clickhouse_add_executable (hadoop_snappy_read_buffer hadoop_snappy_read_buffer.cpp)
 target_link_libraries (hadoop_snappy_read_buffer PRIVATE clickhouse_common_io)
 
+if (TARGET ch_contrib::hdfs)
+    clickhouse_add_executable (read_buffer_from_hdfs read_buffer_from_hdfs.cpp)
+    target_link_libraries (read_buffer_from_hdfs PRIVATE dbms ch_contrib::hdfs)
+endif ()
+
+
diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp
index 07c56961db0..da4e5298681 100644
--- a/src/IO/examples/read_buffer_from_hdfs.cpp
+++ b/src/IO/examples/read_buffer_from_hdfs.cpp
@@ -11,15 +11,15 @@ using namespace DB;
 
 int main()
 {
-    setenv("LIBHDFS3_CONF", "/data1/clickhouse_official/conf/hdfs-site.bigocluster.xml", true); /// NOLINT
-    String hdfs_uri = "hdfs://bigocluster";
-    String hdfs_file_path = "/data/hive/report_tb.db/bigolive_wj_pos_sdk_video_stats_event_allv1/day=2023-03-14/"
-                            "part-00014-272de29e-098c-4007-987a-f6b7ae740402-c000";
+    setenv("LIBHDFS3_CONF", "/path/to/hdfs-site.xml", true); /// NOLINT
+    String hdfs_uri = "hdfs://cluster_name";
+    String hdfs_file_path = "/path/to/hdfs/file";
     ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
     ReadSettings read_settings;
-    ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 625150306UL, false);
+    ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 2097152UL, false);
 
     String download_path = "./download";
     WriteBufferFromFile write_buffer(download_path);
     copyData(read_buffer, write_buffer);
+    return 0;
 }

From 9ee0476d32262653d67e406a0946fa91c0bff451 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 27 Jun 2023 11:59:01 +0300
Subject: [PATCH 030/242] Update src/Functions/GregorianDate.h

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Functions/GregorianDate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h
index 31b3c8df0de..16fcb5ea061 100644
--- a/src/Functions/GregorianDate.h
+++ b/src/Functions/GregorianDate.h
@@ -317,7 +317,7 @@ namespace DB
             writeChar('0' + d     , buf);
         }
 
-        return ReturnType();
+        return ReturnType(true);
     }
 
     template <typename YearT>

From 59928cb4856c5a82d3aeb402fef6936bfece3d85 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 15:50:56 +0000
Subject: [PATCH 031/242] Docs: Fix description of output field NON_UNIQUE for
 statement SHOW INDEXES

---
 docs/en/sql-reference/statements/show.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index f96eb55aa45..336b93db9d5 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
 
 `SHOW INDEX` produces a result table with the following structure:
 - table - The name of the table (String)
-- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8)
+- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
 - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
 - seq_in_index - Currently unused
 - column_name - Currently unused

From 047060f9a41589c3b0e19338ac03e0c89d076c87 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:09:30 +0000
Subject: [PATCH 032/242] SHOW INDEX: Make fields COMMENT and INDEX_COMMENT
 more compatible with MySQL

---
 docs/en/sql-reference/statements/show.md      | 18 ++---
 .../InterpreterShowIndexesQuery.cpp           |  8 +-
 .../0_stateless/02724_show_indexes.reference  | 80 +++++++++----------
 3 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 336b93db9d5..38fa63b4e1c 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
 
 The optional keyword `FULL` causes the output to include the collation, comment and privilege columns.
 
-`SHOW COLUMNS` produces a result table with the following structure:
+The statement produces a result table with the following structure:
 - field - The name of the column (String)
 - type - The column data type (String)
 - null - If the column data type is Nullable (UInt8)
@@ -281,7 +281,7 @@ equivalent. If no database is specified, the query assumes the current database
 
 The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
 
-`SHOW INDEX` produces a result table with the following structure:
+The statement produces a result table with the following structure:
 - table - The name of the table (String)
 - non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
 - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
@@ -293,8 +293,8 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
 - packed - Currently unused
 - null - Currently unused
 - index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
-- comment - Currently unused
-- index_comment - Currently unused
+- comment - `` additional information about the index, currently always `` (empty string) (String)
+- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String)
 - visible - If the index is visible to the optimizer, always `YES` (String)
 - expression - The index expression (String)
 
@@ -310,11 +310,11 @@ Result:
 
 ``` text
 ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ    │ ᴺᵁᴸᴸ          │ YES     │ d, b       │
-│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │ ᴺᵁᴸᴸ    │ ᴺᵁᴸᴸ          │ YES     │ a, c, d    │
-│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │ ᴺᵁᴸᴸ    │ ᴺᵁᴸᴸ          │ YES     │ c, d, e    │
-│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ primary      │ ᴺᵁᴸᴸ    │ ᴺᵁᴸᴸ          │ YES     │ c, a       │
-│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ set          │ ᴺᵁᴸᴸ    │ ᴺᵁᴸᴸ          │ YES     │ e          │
+│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ bloom_filter │         │               │ YES     │ d, b       │
+│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │         │               │ YES     │ a, c, d    │
+│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │         │               │ YES     │ c, d, e    │
+│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ primary      │         │               │ YES     │ c, a       │
+│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ set          │         │               │ YES     │ e          │
 └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 51311c82eeb..66a1b2941a3 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -50,8 +50,8 @@ FROM (
             NULL AS packed,
             NULL AS null,
             'primary' AS index_type,
-            NULL AS comment,
-            NULL AS index_comment,
+            '' AS comment,
+            '' AS index_comment,
             'YES' AS visible,
             primary_key AS expression
         FROM system.tables
@@ -71,8 +71,8 @@ FROM (
             NULL AS packed,
             NULL AS null,
             type AS index_type,
-            NULL AS comment,
-            NULL AS index_comment,
+            '' AS comment,
+            '' AS index_comment,
             'YES' AS visible,
             expr AS expression
         FROM system.data_skipping_indices
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index 8365ade3231..8d3d37eab04 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,47 +1,47 @@
 --- Aliases of SHOW INDEX
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
 --- EXTENDED
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
 --- WHERE
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
 --- Check with weird table names
-$4@^7	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c
-NULL	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c
+$4@^7	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
+NULL	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
+\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
+\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
 --- Original table
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter	\N	\N	YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set	\N	\N	YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
 --- Equally named table in other database
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	a
+tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	b
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	a
 --- Short form
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax	\N	\N	YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary	\N	\N	YES	a
+tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	b
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	a

From e9e3f87ed2ddc08f49a62ef3d880df203a3cd4e1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:19:33 +0000
Subject: [PATCH 033/242] SHOW INDEX: Make fields INDEX_TYPE more compatible
 with MySQL

---
 docs/en/sql-reference/statements/show.md      | 12 +--
 .../InterpreterShowIndexesQuery.cpp           |  4 +-
 .../0_stateless/02724_show_indexes.reference  | 78 +++++++++----------
 3 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 38fa63b4e1c..c73782efbbf 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -292,7 +292,7 @@ The statement produces a result table with the following structure:
 - sub_part - Currently unused
 - packed - Currently unused
 - null - Currently unused
-- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
+- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
 - comment - `` additional information about the index, currently always `` (empty string) (String)
 - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String)
 - visible - If the index is visible to the optimizer, always `YES` (String)
@@ -310,11 +310,11 @@ Result:
 
 ``` text
 ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ bloom_filter │         │               │ YES     │ d, b       │
-│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │         │               │ YES     │ a, c, d    │
-│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ minmax       │         │               │ YES     │ c, d, e    │
-│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ primary      │         │               │ YES     │ c, a       │
-│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ set          │         │               │ YES     │ e          │
+│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
+│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
 └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 66a1b2941a3..fc31b6ef257 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -49,7 +49,7 @@ FROM (
             NULL AS sub_part,
             NULL AS packed,
             NULL AS null,
-            'primary' AS index_type,
+            'PRIMARY' AS index_type,
             '' AS comment,
             '' AS index_comment,
             'YES' AS visible,
@@ -70,7 +70,7 @@ FROM (
             NULL AS sub_part,
             NULL AS packed,
             NULL AS null,
-            type AS index_type,
+            upper(type) AS index_type,
             '' AS comment,
             '' AS index_comment,
             'YES' AS visible,
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index 8d3d37eab04..8872d74bbf2 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,47 +1,45 @@
 --- Aliases of SHOW INDEX
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
 --- WHERE
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
 --- Check with weird table names
-$4@^7	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
-NULL	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c
+$4@^7	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
+NULL	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
+\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
+\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
 --- Original table
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	bloom_filter			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	set			YES	e
+tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	a
+tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	b
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	a
 --- Short form
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	minmax			YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	primary			YES	a
+tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	b
+tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	a

From a8511a0be5bcbf78230f9489064fa6512030347c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:30:18 +0000
Subject: [PATCH 034/242] Fix description of 'comment' field

---
 docs/en/sql-reference/statements/show.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index c73782efbbf..cd691a6ff27 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -293,7 +293,7 @@ The statement produces a result table with the following structure:
 - packed - Currently unused
 - null - Currently unused
 - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
-- comment - `` additional information about the index, currently always `` (empty string) (String)
+- comment - Additional information about the index, currently always `` (empty string) (String)
 - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String)
 - visible - If the index is visible to the optimizer, always `YES` (String)
 - expression - The index expression (String)

From 6aab7577ff8af246d1d1f778dd41121b19a04fa8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:33:33 +0000
Subject: [PATCH 035/242] Document 'packed' field

---
 docs/en/sql-reference/statements/show.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index cd691a6ff27..e86746585a7 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -290,7 +290,7 @@ The statement produces a result table with the following structure:
 - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
 - cardinality - Currently unused
 - sub_part - Currently unused
-- packed - Currently unused
+- packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String))
 - null - Currently unused
 - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
 - comment - Additional information about the index, currently always `` (empty string) (String)

From eb86f274822154c49863dd6ad4a3952f74c3fdb2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:37:15 +0000
Subject: [PATCH 036/242] Document field 'sub_part'

---
 docs/en/sql-reference/statements/show.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index e86746585a7..61cca8b4565 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -289,7 +289,7 @@ The statement produces a result table with the following structure:
 - column_name - Currently unused
 - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
 - cardinality - Currently unused
-- sub_part - Currently unused
+- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String))
 - packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String))
 - null - Currently unused
 - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)

From 5c463838b71ee0cca0493796a9742bde90b1fc42 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:44:34 +0000
Subject: [PATCH 037/242] Improve compatibility of 'cardinality' field

---
 docs/en/sql-reference/statements/show.md      | 12 +--
 .../InterpreterShowIndexesQuery.cpp           |  4 +-
 .../0_stateless/02724_show_indexes.reference  | 76 +++++++++----------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 61cca8b4565..e13f152c0e6 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -288,7 +288,7 @@ The statement produces a result table with the following structure:
 - seq_in_index - Currently unused
 - column_name - Currently unused
 - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
-- cardinality - Currently unused
+- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
 - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String))
 - packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String))
 - null - Currently unused
@@ -310,11 +310,11 @@ Result:
 
 ``` text
 ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
-│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
-│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
-│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
-│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
+│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
+│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
 └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index fc31b6ef257..d5b34e00791 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -45,7 +45,7 @@ FROM (
             NULL AS seq_in_index,
             NULL AS column_name,
             'A' AS collation,
-            NULL AS cardinality,
+            0 AS cardinality,
             NULL AS sub_part,
             NULL AS packed,
             NULL AS null,
@@ -66,7 +66,7 @@ FROM (
             NULL AS seq_in_index,
             NULL AS column_name,
             NULL AS collation,
-            NULL AS cardinality,
+            0 AS cardinality,
             NULL AS sub_part,
             NULL AS packed,
             NULL AS null,
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index 8872d74bbf2..f3019a031af 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,45 +1,45 @@
 --- Aliases of SHOW INDEX
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- WHERE
 --- Check with weird table names
-$4@^7	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
-NULL	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
-\'	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c
+$4@^7	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+NULL	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+\'	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+\'	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
 --- Original table
-tbl	0	blf_idx	\N	\N	\N	\N	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	\N	\N	\N	\N	SET			YES	e
+tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	a
+tbl	0	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
 --- Short form
-tbl	0	mmi_idx	\N	\N	\N	\N	\N	\N	\N	MINMAX			YES	b
-tbl	0	PRIMARY	\N	\N	A	\N	\N	\N	\N	PRIMARY			YES	a
+tbl	0	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a

From b71043b2c9ee4d494a069b2b4f746334348c98a1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 16:57:31 +0000
Subject: [PATCH 038/242] Fix field 'non_unique'

---
 docs/en/sql-reference/statements/show.md      | 12 +--
 .../InterpreterShowIndexesQuery.cpp           |  4 +-
 .../0_stateless/02724_show_indexes.reference  | 76 +++++++++----------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index e13f152c0e6..2c3c56ba95a 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
 
 The statement produces a result table with the following structure:
 - table - The name of the table (String)
-- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
+- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
 - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
 - seq_in_index - Currently unused
 - column_name - Currently unused
@@ -310,11 +310,11 @@ Result:
 
 ``` text
 ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          0 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
-│ tbl   │          0 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
-│ tbl   │          0 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
-│ tbl   │          0 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
-│ tbl   │          0 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
+│ tbl   │          1 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          1 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          1 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          1 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
+│ tbl   │          1 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
 └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index d5b34e00791..3c001329ae3 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -40,7 +40,7 @@ SELECT *
 FROM (
         (SELECT
             name AS table,
-            0 AS non_unique,
+            1 AS non_unique,
             'PRIMARY' AS key_name,
             NULL AS seq_in_index,
             NULL AS column_name,
@@ -61,7 +61,7 @@ FROM (
     UNION ALL (
         SELECT
             table AS table,
-            0 AS non_unique,
+            1 AS non_unique,
             name AS key_name,
             NULL AS seq_in_index,
             NULL AS column_name,
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index f3019a031af..69cd405ec86 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,45 +1,45 @@
 --- Aliases of SHOW INDEX
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- WHERE
 --- Check with weird table names
-$4@^7	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-NULL	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-\'	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-\'	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+$4@^7	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+NULL	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+\'	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+\'	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
 --- Original table
-tbl	0	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	0	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	0	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	0	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	0	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
 --- Short form
-tbl	0	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	0	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a

From 0f68c894f54900aa323e4345bbc8c55eefd2040f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 17:02:00 +0000
Subject: [PATCH 039/242] Point to existing system tables for alternatives

---
 docs/en/sql-reference/statements/show.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 2c3c56ba95a..d30ded6f3dc 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
 
 Displays a list of primary and data skipping indexes of a table.
 
+This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for
+primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices)
+provide equivalent information but in a fashion more native to ClickHouse.
+
 ```sql
 SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} <table> [{FROM | IN} <db>] [WHERE <expr>] [INTO OUTFILE <filename>] [FORMAT <format>]
 ```

From 289d9849d408d9bd38e95d89b6434a8d6bf57664 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 22 Jun 2023 22:50:09 +0200
Subject: [PATCH 040/242] Allow SQL standard FETCH without OFFSET

---
 src/Parsers/ParserSelectQuery.cpp             | 94 ++++++++++---------
 .../02790_sql_standard_fetch.reference        | 36 +++++++
 .../0_stateless/02790_sql_standard_fetch.sql  | 31 ++++++
 3 files changed, 119 insertions(+), 42 deletions(-)
 create mode 100644 tests/queries/0_stateless/02790_sql_standard_fetch.reference
 create mode 100644 tests/queries/0_stateless/02790_sql_standard_fetch.sql

diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 1c48f773823..341c1ef60b4 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     /// This is needed for TOP expression, because it can also use WITH TIES.
     bool limit_with_ties_occured = false;
 
+    bool has_offset_clause = false;
+    bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS}
+
     /// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list
     if (s_limit.ignore(pos, expected))
     {
@@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         {
             if (!exp_elem.parse(pos, limit_offset, expected))
                 return false;
+
+            has_offset_clause = true;
         }
         else if (s_with_ties.ignore(pos, expected))
         {
@@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (s_offset.ignore(pos, expected))
     {
-        /// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}
-        bool offset_with_fetch_maybe = false;
+        /// OFFSET without LIMIT
+
+        has_offset_clause = true;
 
         if (!exp_elem.parse(pos, limit_offset, expected))
             return false;
 
+        /// SQL standard OFFSET N ROW[S] ...
+
+        if (s_row.ignore(pos, expected))
+            offset_clause_has_sql_standard_row_or_rows = true;
+
+        if (s_rows.ignore(pos, expected))
+        {
+            if (offset_clause_has_sql_standard_row_or_rows)
+                throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
+
+            offset_clause_has_sql_standard_row_or_rows = true;
+        }
+    }
+
+    /// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY)
+    if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows)
+        && s_fetch.ignore(pos, expected))
+    {
+        /// FETCH clause must exist with "ORDER BY"
+        if (!order_expression_list)
+            throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY");
+
+        if (s_first.ignore(pos, expected))
+        {
+            if (s_next.ignore(pos, expected))
+                throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
+        }
+        else if (!s_next.ignore(pos, expected))
+            return false;
+
+        if (!exp_elem.parse(pos, limit_length, expected))
+            return false;
+
         if (s_row.ignore(pos, expected))
         {
             if (s_rows.ignore(pos, expected))
                 throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
-            offset_with_fetch_maybe = true;
         }
-        else if (s_rows.ignore(pos, expected))
+        else if (!s_rows.ignore(pos, expected))
+            return false;
+
+        if (s_with_ties.ignore(pos, expected))
         {
-            offset_with_fetch_maybe = true;
+            select_query->limit_with_ties = true;
         }
-
-        if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected))
+        else if (s_only.ignore(pos, expected))
         {
-            /// OFFSET FETCH clause must exists with "ORDER BY"
-            if (!order_expression_list)
-                throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY");
-
-            if (s_first.ignore(pos, expected))
-            {
-                if (s_next.ignore(pos, expected))
-                    throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
-            }
-            else if (!s_next.ignore(pos, expected))
-                return false;
-
-            if (!exp_elem.parse(pos, limit_length, expected))
-                return false;
-
-            if (s_row.ignore(pos, expected))
-            {
-                if (s_rows.ignore(pos, expected))
-                    throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
-            }
-            else if (!s_rows.ignore(pos, expected))
-                return false;
-
-            if (s_with_ties.ignore(pos, expected))
-            {
-                select_query->limit_with_ties = true;
-            }
-            else if (s_only.ignore(pos, expected))
-            {
-                select_query->limit_with_ties = false;
-            }
-            else
-            {
-                return false;
-            }
+            select_query->limit_with_ties = false;
+        }
+        else
+        {
+            return false;
         }
     }
 
diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
new file mode 100644
index 00000000000..429eecbc936
--- /dev/null
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
@@ -0,0 +1,36 @@
+┌─id─┬─name──┬─department─┬─salary─┐
+│ 25 │ Frank │ it         │    120 │
+│ 23 │ Henry │ it         │    104 │
+│ 24 │ Irene │ it         │    104 │
+│ 33 │ Alice │ sales      │    100 │
+│ 32 │ Dave  │ sales      │     96 │
+└────┴───────┴────────────┴────────┘
+┌─id─┬─name──┬─department─┬─salary─┐
+│ 25 │ Frank │ it         │    120 │
+│ 23 │ Henry │ it         │    104 │
+│ 24 │ Irene │ it         │    104 │
+│ 33 │ Alice │ sales      │    100 │
+│ 32 │ Dave  │ sales      │     96 │
+└────┴───────┴────────────┴────────┘
+┌─id─┬─name──┬─department─┬─salary─┐
+│ 25 │ Frank │ it         │    120 │
+│ 23 │ Henry │ it         │    104 │
+│ 24 │ Irene │ it         │    104 │
+│ 33 │ Alice │ sales      │    100 │
+│ 31 │ Cindy │ sales      │     96 │
+│ 32 │ Dave  │ sales      │     96 │
+└────┴───────┴────────────┴────────┘
+┌─id─┬─name──┬─department─┬─salary─┐
+│ 33 │ Alice │ sales      │    100 │
+│ 31 │ Cindy │ sales      │     96 │
+│ 32 │ Dave  │ sales      │     96 │
+│ 22 │ Grace │ it         │     90 │
+│ 21 │ Emma  │ it         │     84 │
+└────┴───────┴────────────┴────────┘
+┌─id─┬─name──┬─department─┬─salary─┐
+│ 33 │ Alice │ sales      │    100 │
+│ 31 │ Cindy │ sales      │     96 │
+│ 32 │ Dave  │ sales      │     96 │
+│ 22 │ Grace │ it         │     90 │
+│ 21 │ Emma  │ it         │     84 │
+└────┴───────┴────────────┴────────┘
diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
new file mode 100644
index 00000000000..58ffa035d47
--- /dev/null
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
@@ -0,0 +1,31 @@
+# https://antonz.org/sql-fetch/
+
+CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64);
+INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84');
+
+select * from employees
+order by salary desc
+limit 5
+format PrettyCompactNoEscapes;
+
+select * from employees
+order by salary desc
+fetch first 5 rows only
+format PrettyCompactNoEscapes;
+
+select * from employees
+order by salary desc
+fetch first 5 rows with ties
+format PrettyCompactNoEscapes;
+
+select * from employees
+order by salary desc
+offset 3 rows
+fetch next 5 rows only
+format PrettyCompactNoEscapes;
+
+select * from employees
+order by salary desc
+offset 3 rows
+fetch first 5 rows only
+format PrettyCompactNoEscapes;

From caa75a7fc38ab95a405488b3826022683160eed9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 4 Jul 2023 20:59:28 +0200
Subject: [PATCH 041/242] Make the test stable

---
 .../0_stateless/02790_sql_standard_fetch.sql      | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
index 58ffa035d47..4204279a746 100644
--- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
@@ -1,30 +1,33 @@
-# https://antonz.org/sql-fetch/
+-- https://antonz.org/sql-fetch/
 
 CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64);
 INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84');
 
-select * from employees
+-- Determinism
+SET max_threads = 1, parallelize_output_from_storages = 0;
+
+select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 limit 5
 format PrettyCompactNoEscapes;
 
-select * from employees
+select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows only
 format PrettyCompactNoEscapes;
 
-select * from employees
+select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows with ties
 format PrettyCompactNoEscapes;
 
-select * from employees
+select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch next 5 rows only
 format PrettyCompactNoEscapes;
 
-select * from employees
+select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch first 5 rows only

From 966e93b9084541c311ddb482c7e767413ceb359f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 17:02:00 +0000
Subject: [PATCH 042/242] Point to existing system tables for alternatives

---
 docs/en/sql-reference/statements/show.md      | 32 ++++----
 .../InterpreterShowIndexesQuery.cpp           |  8 +-
 .../0_stateless/02724_show_indexes.reference  | 76 +++++++++----------
 3 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index d30ded6f3dc..b5bacef7b1f 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -286,21 +286,21 @@ equivalent. If no database is specified, the query assumes the current database
 The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
 
 The statement produces a result table with the following structure:
-- table - The name of the table (String)
+- table - The name of the table. (String)
 - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
-- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
-- seq_in_index - Currently unused
-- column_name - Currently unused
-- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
+- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String)
+- seq_in_index - Currently always `1`. (In MySQL, this field denotes the position of the column in a non-functional index.) (UInt8)
+- column_name - Currently always `` (empty string), also see field `expression`. (In MySQL, this field denotes the name of the column in a non-functional index.) (String)
+- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String))
 - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
-- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String))
-- packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String))
+- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String))
+- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String))
 - null - Currently unused
 - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
-- comment - Additional information about the index, currently always `` (empty string) (String)
-- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String)
-- visible - If the index is visible to the optimizer, always `YES` (String)
-- expression - The index expression (String)
+- comment - Additional information about the index, currently always `` (empty string). (String)
+- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
+- visible - If the index is visible to the optimizer, always `YES`. (String)
+- expression - The index expression. (In MySQL this field is only used for functional-indexes.) (String)
 
 **Examples**
 
@@ -314,11 +314,11 @@ Result:
 
 ``` text
 ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          1 │ blf_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
-│ tbl   │          1 │ mm1_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
-│ tbl   │          1 │ mm2_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
-│ tbl   │          1 │ PRIMARY  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
-│ tbl   │          1 │ set_idx  │ ᴺᵁᴸᴸ         │ ᴺᵁᴸᴸ        │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
+│ tbl   │          1 │ blf_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          1 │ mm1_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          1 │ mm2_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          1 │ PRIMARY  │ 1            │             │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
+│ tbl   │          1 │ set_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
 └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 3c001329ae3..5aafc22389f 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -42,8 +42,8 @@ FROM (
             name AS table,
             1 AS non_unique,
             'PRIMARY' AS key_name,
-            NULL AS seq_in_index,
-            NULL AS column_name,
+            1 AS seq_in_index,
+            '' AS column_name,
             'A' AS collation,
             0 AS cardinality,
             NULL AS sub_part,
@@ -63,8 +63,8 @@ FROM (
             table AS table,
             1 AS non_unique,
             name AS key_name,
-            NULL AS seq_in_index,
-            NULL AS column_name,
+            1 AS seq_in_index,
+            '' AS column_name,
             NULL AS collation,
             0 AS cardinality,
             NULL AS sub_part,
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index 69cd405ec86..c5b7883e17e 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,45 +1,45 @@
 --- Aliases of SHOW INDEX
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- WHERE
 --- Check with weird table names
-$4@^7	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-NULL	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-\'	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
-\'	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c
+$4@^7	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
+NULL	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
+\'	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
+\'	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
 --- Original table
-tbl	1	blf_idx	\N	\N	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	\N	\N	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	1	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	a
 --- Short form
-tbl	1	mmi_idx	\N	\N	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	\N	\N	A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	a

From f9a3856715a0de7ad8bb1136b8d22ded277bafd4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 21:02:30 +0000
Subject: [PATCH 043/242] Poor man's tuple parsing

---
 .../InterpreterShowIndexesQuery.cpp           | 23 +++++-
 .../0_stateless/02724_show_indexes.reference  | 82 ++++++++++---------
 2 files changed, 63 insertions(+), 42 deletions(-)

diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 5aafc22389f..2f65cc3ec3a 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -42,8 +42,8 @@ FROM (
             name AS table,
             1 AS non_unique,
             'PRIMARY' AS key_name,
-            1 AS seq_in_index,
-            '' AS column_name,
+            arrayJoin(splitByString(', ', primary_key)) AS column_name,
+            row_number() over (order by column_name) AS seq_in_index,
             'A' AS collation,
             0 AS cardinality,
             NULL AS sub_part,
@@ -53,7 +53,7 @@ FROM (
             '' AS comment,
             '' AS index_comment,
             'YES' AS visible,
-            primary_key AS expression
+            '' AS expression
         FROM system.tables
         WHERE
             database = '{0}'
@@ -63,8 +63,8 @@ FROM (
             table AS table,
             1 AS non_unique,
             name AS key_name,
-            1 AS seq_in_index,
             '' AS column_name,
+            1 AS seq_in_index,
             NULL AS collation,
             0 AS cardinality,
             NULL AS sub_part,
@@ -86,6 +86,21 @@ ORDER BY index_type, expression;)", database, table, where_expression);
     /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some
     /// SQL tests can take advantage of this.
 
+    /// Note about compatibility of fields 'column_name', 'seq_in_index' and 'expression' with MySQL:
+    /// MySQL has non-functional and functional indexes.
+    /// - Non-functional indexes only reference columns, e.g. 'col1, col2'. In this case, `SHOW INDEX` produces as many result rows as there
+    ///   are indexed columns. 'column_name' and 'seq_in_index' (an ascending integer 1, 2, ...) are filled, 'expression' is empty.
+    /// - Functional indexes can reference arbitrary expressions, e.g. 'col1 + 1, concat(col2, col3)'. 'SHOW INDEX' produces a single row
+    ///   with `column_name` and `seq_in_index` empty and `expression` filled with the entire index expression. Only non-primary-key indexes
+    ///   can be functional indexes.
+    /// Above SELECT tries to emulate that. Caveats:
+    /// 1. The primary key index sub-SELECT assumes the primary key expression is non-functional. Non-functional primary key indexes in
+    ///    ClickHouse are possible but quiete obscure. In MySQL they are not possible at all.
+    /// 2. Related to 1.: Poor man's tuple parsing with splitByString() in the PK sub-SELECT messes up for functional primary key index
+    ///    expressions where the comma is not only used as separator between tuple components, e.g. in 'col1 + 1, concat(col2, col3)'.
+    /// 3. The data skipping index sub-SELECT assumes the index expression is functional. 3rd party tools that expect MySQL semantics from
+    ///    SHOW INDEX will probably not care as MySQL has no skipping indexes and they only use the result to figure out the primary key.
+
     return rewritten_query;
 }
 
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index c5b7883e17e..063105e3332 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,45 +1,51 @@
 --- Aliases of SHOW INDEX
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 --- WHERE
 --- Check with weird table names
-$4@^7	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
-NULL	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
-\'	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
-\'	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c
+$4@^7	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
+NULL	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
+\'	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
+\'	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
 --- Original table
-tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	c, a
-tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx		1	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
 --- Short form
-tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	1		A	0	\N	\N	\N	PRIMARY			YES	a
+tbl	1	mmi_idx		1	\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	

From 2460268e3c260254021902f57e0e21e40d8d9d29 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 4 Jul 2023 23:22:08 +0200
Subject: [PATCH 044/242] Remove templates

---
 src/Functions/GregorianDate.cpp         | 272 ++++++++++++++
 src/Functions/GregorianDate.h           | 481 +++++-------------------
 src/Functions/fromModifiedJulianDay.cpp |   5 +-
 src/Functions/toModifiedJulianDay.cpp   |   8 +-
 4 files changed, 376 insertions(+), 390 deletions(-)
 create mode 100644 src/Functions/GregorianDate.cpp

diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
new file mode 100644
index 00000000000..0f8a95ff3e7
--- /dev/null
+++ b/src/Functions/GregorianDate.cpp
@@ -0,0 +1,272 @@
+#include <Functions/GregorianDate.h>
+
+#include <Common/Exception.h>
+#include <Core/Types.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+    extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_FORMAT_DATETIME;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace gd
+{
+    static inline constexpr bool is_leap_year(int32_t year)
+    {
+        return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
+    }
+
+    static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
+    {
+        switch (month)
+        {
+        case  1: return 31;
+        case  2: return is_leap_year ? 29 : 28;
+        case  3: return 31;
+        case  4: return 30;
+        case  5: return 31;
+        case  6: return 30;
+        case  7: return 31;
+        case  8: return 31;
+        case  9: return 30;
+        case 10: return 31;
+        case 11: return 30;
+        case 12: return 31;
+        default:
+            std::terminate();
+        }
+    }
+
+    /** Integer division truncated toward negative infinity.
+      */
+    template <typename I, typename J>
+    static inline constexpr I div(I x, J y)
+    {
+        const auto y_cast = static_cast<I>(y);
+        if (x > 0 && y_cast < 0)
+            return ((x - 1) / y_cast) - 1;
+        else if (x < 0 && y_cast > 0)
+            return ((x + 1) / y_cast) - 1;
+        else
+            return x / y_cast;
+    }
+
+    /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
+      */
+    template <typename I, typename J>
+    static inline constexpr I mod(I x, J y)
+    {
+        const auto y_cast = static_cast<I>(y);
+        const auto r = x % y_cast;
+        if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
+            return r == 0 ? static_cast<I>(0) : r + y_cast;
+        else
+            return r;
+    }
+
+    /** Like std::min(), but the type of operands may differ.
+      */
+    template <typename I, typename J>
+    static inline constexpr I min(I x, J y)
+    {
+        const auto y_cast = static_cast<I>(y);
+        return x < y_cast ? x : y_cast;
+    }
+
+    static inline char readDigit(ReadBuffer & in)
+    {
+        char c;
+        if (!in.read(c))
+            throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
+        else if (c < '0' || c > '9')
+            throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
+        else
+            return c - '0';
+    }
+}
+
+GregorianDate::GregorianDate(ReadBuffer & in)
+{
+    year_ = gd::readDigit(in) * 1000
+          + gd::readDigit(in) * 100
+          + gd::readDigit(in) * 10
+          + gd::readDigit(in);
+
+    assertChar('-', in);
+
+    month_ = gd::readDigit(in) * 10
+           + gd::readDigit(in);
+
+    assertChar('-', in);
+
+    day_of_month_ = gd::readDigit(in) * 10
+                + gd::readDigit(in);
+
+    assertEOF(in);
+
+    if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
+        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
+}
+
+GregorianDate::GregorianDate(int64_t modified_julian_day)
+{
+    const OrdinalDate ord(modified_julian_day);
+    const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
+
+    year_       = ord.year();
+    month_      = md.month();
+    day_of_month_ = md.dayOfMonth();
+}
+
+int64_t GregorianDate::toModifiedJulianDay() const
+{
+    const MonthDay md(month_, day_of_month_);
+    const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
+    const OrdinalDate ord(year_, day_of_year);
+    return ord.toModifiedJulianDay();
+}
+
+template <typename ReturnType>
+ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
+{
+    if (year_ < 0 || year_ > 9999)
+    {
+        if constexpr (std::is_same_v<ReturnType, void>)
+            throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
+                "Impossible to stringify: year too big or small: {}", DB::toString(year_));
+        else
+            return false;
+    }
+    else
+    {
+        auto y = year_;
+        writeChar('0' + y / 1000, buf); y %= 1000;
+        writeChar('0' + y /  100, buf); y %=  100;
+        writeChar('0' + y /   10, buf); y %=   10;
+        writeChar('0' + y       , buf);
+
+        writeChar('-', buf);
+
+        auto m = month_;
+        writeChar('0' + m / 10, buf); m %= 10;
+        writeChar('0' + m     , buf);
+
+        writeChar('-', buf);
+
+        auto d = day_of_month_;
+        writeChar('0' + d / 10, buf); d %= 10;
+        writeChar('0' + d     , buf);
+    }
+
+    return ReturnType(true);
+}
+
+std::string GregorianDate::toString() const
+{
+    WriteBufferFromOwnString buf;
+    write(buf);
+    return buf.str();
+}
+
+OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
+    : year_(year)
+    , day_of_year_(day_of_year)
+{
+    if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
+    }
+}
+
+OrdinalDate::OrdinalDate(int64_t modified_julian_day)
+{
+    /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
+
+    if (modified_julian_day < -678941)
+        throw Exception(
+            ErrorCodes::CANNOT_FORMAT_DATETIME,
+            "Value cannot be represented as date because it's out of range");
+
+    if (modified_julian_day > 2973119)
+        throw Exception(
+            ErrorCodes::CANNOT_FORMAT_DATETIME,
+            "Value cannot be represented as date because it's out of range");
+
+    const auto a         = modified_julian_day + 678575;
+    const auto quad_cent = gd::div(a, 146097);
+    const auto b         = gd::mod(a, 146097);
+    const auto cent      = gd::min(gd::div(b, 36524), 3);
+    const auto c         = b - cent * 36524;
+    const auto quad      = gd::div(c, 1461);
+    const auto d         = gd::mod(c, 1461);
+    const auto y         = gd::min(gd::div(d, 365), 3);
+
+    day_of_year_ = d - y * 365 + 1;
+    year_ = static_cast<int32_t>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
+}
+
+int64_t OrdinalDate::toModifiedJulianDay() const noexcept
+{
+    const auto y = year_ - 1;
+    return day_of_year_
+        + 365 * y
+        + gd::div(y, 4)
+        - gd::div(y, 100)
+        + gd::div(y, 400)
+        - 678576;
+}
+
+MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
+    : month_(month)
+    , day_of_month_(day_of_month)
+{
+    if (month < 1 || month > 12)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
+    /* We can't validate day_of_month here, because we don't know if
+     * it's a leap year. */
+}
+
+MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
+{
+    if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
+                        (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
+
+    month_ = 1;
+    uint16_t d = day_of_year;
+    while (true)
+    {
+        const auto len = gd::monthLength(is_leap_year, month_);
+        if (d <= len)
+            break;
+        month_++;
+        d -= len;
+    }
+    day_of_month_ = d;
+}
+
+uint16_t MonthDay::dayOfYear(bool is_leap_year) const
+{
+    if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
+            (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
+    }
+    const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
+    return (367 * month_ - 362) / 12 + k + day_of_month_;
+}
+
+template void GregorianDate::writeImpl<void>(WriteBuffer & buf) const;
+template bool GregorianDate::writeImpl<bool>(WriteBuffer & buf) const;
+
+}
diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h
index 16fcb5ea061..4a0cbec5afe 100644
--- a/src/Functions/GregorianDate.h
+++ b/src/Functions/GregorianDate.h
@@ -1,425 +1,138 @@
 #pragma once
 
-#include <base/extended_types.h>
-#include <Common/Exception.h>
 #include <Core/Types.h>
-#include <IO/ReadBuffer.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/WriteHelpers.h>
-
-#include <cstdint>
 
 
 namespace DB
 {
-    namespace ErrorCodes
-    {
-        extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
-        extern const int CANNOT_PARSE_DATE;
-        extern const int CANNOT_FORMAT_DATETIME;
-        extern const int LOGICAL_ERROR;
-    }
 
-    /** Proleptic Gregorian calendar date. YearT is an integral type
+class ReadBuffer;
+class WriteBuffer;
+
+/// Proleptic Gregorian calendar date.
+class GregorianDate
+{
+public:
+    /** Construct from date in text form 'YYYY-MM-DD' by reading from
+      * ReadBuffer.
+      */
+    explicit GregorianDate(ReadBuffer & in);
+
+    /** Construct from Modified Julian Day. The type T is an
+      * integral type which should be at least 32 bits wide, and
+      * should preferably signed.
+      */
+    explicit GregorianDate(int64_t modified_julian_day);
+
+    /** Convert to Modified Julian Day. The type T is an integral type
       * which should be at least 32 bits wide, and should preferably
-      * be signed.
-     */
-    template <typename YearT = int32_t>
-    class GregorianDate
-    {
-    public:
-        /** Construct from date in text form 'YYYY-MM-DD' by reading from
-          * ReadBuffer.
-          */
-        explicit GregorianDate(ReadBuffer & in);
-
-        /** Construct from Modified Julian Day. The type T is an
-          * integral type which should be at least 32 bits wide, and
-          * should preferably signed.
-          */
-        explicit GregorianDate(is_integer auto modified_julian_day);
-
-        /** Convert to Modified Julian Day. The type T is an integral type
-          * which should be at least 32 bits wide, and should preferably
-          * signed.
-          */
-        template <is_integer T>
-        T toModifiedJulianDay() const;
-
-        /** Write the date in text form 'YYYY-MM-DD' to a buffer.
-          */
-        void write(WriteBuffer & buf) const
-        {
-            writeImpl<void>(buf);
-        }
-
-        bool tryWrite(WriteBuffer & buf) const
-        {
-            return writeImpl<bool>(buf);
-        }
-
-        /** Convert to a string in text form 'YYYY-MM-DD'.
-          */
-        std::string toString() const;
-
-        YearT year() const noexcept
-        {
-            return year_;
-        }
-
-        uint8_t month() const noexcept
-        {
-            return month_;
-        }
-
-        uint8_t dayOfMonth() const noexcept
-        {
-            return day_of_month_;
-        }
-
-    private:
-        YearT year_ = 0;
-        uint8_t month_ = 0;
-        uint8_t day_of_month_ = 0;
-
-        template <typename ReturnType>
-        ReturnType writeImpl(WriteBuffer & buf) const;
-    };
-
-    /** ISO 8601 Ordinal Date. YearT is an integral type which should
-      * be at least 32 bits wide, and should preferably signed.
-     */
-    template <typename YearT = int32_t>
-    class OrdinalDate
-    {
-    public:
-        OrdinalDate(YearT year, uint16_t day_of_year);
-
-        /** Construct from Modified Julian Day. The type T is an
-          * integral type which should be at least 32 bits wide, and
-          * should preferably signed.
-          */
-        template <is_integer DayT>
-        explicit OrdinalDate(DayT modified_julian_day);
-
-        /** Convert to Modified Julian Day. The type T is an integral
-          * type which should be at least 32 bits wide, and should
-          * preferably be signed.
-          */
-        template <is_integer T>
-        T toModifiedJulianDay() const noexcept;
-
-        YearT year() const noexcept
-        {
-            return year_;
-        }
-
-        uint16_t dayOfYear() const noexcept
-        {
-            return day_of_year_;
-        }
-
-    private:
-        YearT year_ = 0;
-        uint16_t day_of_year_ = 0;
-    };
-
-    class MonthDay
-    {
-    public:
-        /** Construct from month and day. */
-        MonthDay(uint8_t month, uint8_t day_of_month);
-
-        /** Construct from day of year in Gregorian or Julian
-          * calendars to month and day.
-          */
-        MonthDay(bool is_leap_year, uint16_t day_of_year);
-
-        /** Convert month and day in Gregorian or Julian calendars to
-          * day of year.
-          */
-        uint16_t dayOfYear(bool is_leap_year) const;
-
-        uint8_t month() const noexcept
-        {
-            return month_;
-        }
-
-        uint8_t dayOfMonth() const noexcept
-        {
-            return day_of_month_;
-        }
-
-    private:
-        uint8_t month_ = 0;
-        uint8_t day_of_month_ = 0;
-    };
-}
-
-
-namespace gd
-{
-    using namespace DB;
-
-    template <typename YearT>
-    static inline constexpr bool is_leap_year(YearT year)
-    {
-        return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
-    }
-
-    static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
-    {
-        switch (month)
-        {
-        case  1: return 31;
-        case  2: return is_leap_year ? 29 : 28;
-        case  3: return 31;
-        case  4: return 30;
-        case  5: return 31;
-        case  6: return 30;
-        case  7: return 31;
-        case  8: return 31;
-        case  9: return 30;
-        case 10: return 31;
-        case 11: return 30;
-        case 12: return 31;
-        default:
-            std::terminate();
-        }
-    }
-
-    /** Integer division truncated toward negative infinity.
+      * signed.
       */
-    template <typename I, typename J>
-    static inline constexpr I div(I x, J y)
-    {
-        const auto y_cast = static_cast<I>(y);
-        if (x > 0 && y_cast < 0)
-            return ((x - 1) / y_cast) - 1;
-        else if (x < 0 && y_cast > 0)
-            return ((x + 1) / y_cast) - 1;
-        else
-            return x / y_cast;
-    }
+    int64_t toModifiedJulianDay() const;
 
-    /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
+    /** Write the date in text form 'YYYY-MM-DD' to a buffer.
       */
-    template <typename I, typename J>
-    static inline constexpr I mod(I x, J y)
+    void write(WriteBuffer & buf) const
     {
-        const auto y_cast = static_cast<I>(y);
-        const auto r = x % y_cast;
-        if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
-            return r == 0 ? static_cast<I>(0) : r + y_cast;
-        else
-            return r;
+        writeImpl<void>(buf);
     }
 
-    /** Like std::min(), but the type of operands may differ.
+    bool tryWrite(WriteBuffer & buf) const
+    {
+        return writeImpl<bool>(buf);
+    }
+
+    /** Convert to a string in text form 'YYYY-MM-DD'.
       */
-    template <typename I, typename J>
-    static inline constexpr I min(I x, J y)
+    std::string toString() const;
+
+    int32_t year() const noexcept
     {
-        const auto y_cast = static_cast<I>(y);
-        return x < y_cast ? x : y_cast;
+        return year_;
     }
 
-    static inline char readDigit(ReadBuffer & in)
+    uint8_t month() const noexcept
     {
-        char c;
-        if (!in.read(c))
-            throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
-        else if (c < '0' || c > '9')
-            throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
-        else
-            return c - '0';
-    }
-}
-
-namespace DB
-{
-    template <typename YearT>
-    GregorianDate<YearT>::GregorianDate(ReadBuffer & in)
-    {
-        year_ = gd::readDigit(in) * 1000
-              + gd::readDigit(in) * 100
-              + gd::readDigit(in) * 10
-              + gd::readDigit(in);
-
-        assertChar('-', in);
-
-        month_ = gd::readDigit(in) * 10
-               + gd::readDigit(in);
-
-        assertChar('-', in);
-
-        day_of_month_ = gd::readDigit(in) * 10
-                    + gd::readDigit(in);
-
-        assertEOF(in);
-
-        if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
-            throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
+        return month_;
     }
 
-    template <typename YearT>
-    GregorianDate<YearT>::GregorianDate(is_integer auto modified_julian_day)
+    uint8_t dayOfMonth() const noexcept
     {
-        const OrdinalDate<YearT> ord(modified_julian_day);
-        const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
-
-        year_       = ord.year();
-        month_      = md.month();
-        day_of_month_ = md.dayOfMonth();
+        return day_of_month_;
     }
 
-    template <typename YearT>
-    template <is_integer T>
-    T GregorianDate<YearT>::toModifiedJulianDay() const
-    {
-        const MonthDay md(month_, day_of_month_);
-        const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
-        const OrdinalDate<YearT> ord(year_, day_of_year);
-        return ord.template toModifiedJulianDay<T>();
-    }
+private:
+    int32_t year_ = 0;
+    uint8_t month_ = 0;
+    uint8_t day_of_month_ = 0;
 
-    template <typename YearT>
     template <typename ReturnType>
-    ReturnType GregorianDate<YearT>::writeImpl(WriteBuffer & buf) const
+    ReturnType writeImpl(WriteBuffer & buf) const;
+};
+
+/** ISO 8601 Ordinal Date.
+ */
+class OrdinalDate
+{
+public:
+    OrdinalDate(int32_t year, uint16_t day_of_year);
+
+    /** Construct from Modified Julian Day. The type T is an
+      * integral type which should be at least 32 bits wide, and
+      * should preferably signed.
+      */
+    explicit OrdinalDate(int64_t modified_julian_day);
+
+    /** Convert to Modified Julian Day. The type T is an integral
+      * type which should be at least 32 bits wide, and should
+      * preferably be signed.
+      */
+    int64_t toModifiedJulianDay() const noexcept;
+
+    int32_t year() const noexcept
     {
-        if (year_ < 0 || year_ > 9999)
-        {
-            if constexpr (std::is_same_v<ReturnType, void>)
-                throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
-                    "Impossible to stringify: year too big or small: {}", DB::toString(year_));
-            else
-                return false;
-        }
-        else
-        {
-            auto y = year_;
-            writeChar('0' + y / 1000, buf); y %= 1000;
-            writeChar('0' + y /  100, buf); y %=  100;
-            writeChar('0' + y /   10, buf); y %=   10;
-            writeChar('0' + y       , buf);
-
-            writeChar('-', buf);
-
-            auto m = month_;
-            writeChar('0' + m / 10, buf); m %= 10;
-            writeChar('0' + m     , buf);
-
-            writeChar('-', buf);
-
-            auto d = day_of_month_;
-            writeChar('0' + d / 10, buf); d %= 10;
-            writeChar('0' + d     , buf);
-        }
-
-        return ReturnType(true);
+        return year_;
     }
 
-    template <typename YearT>
-    std::string GregorianDate<YearT>::toString() const
+    uint16_t dayOfYear() const noexcept
     {
-        WriteBufferFromOwnString buf;
-        write(buf);
-        return buf.str();
+        return day_of_year_;
     }
 
-    template <typename YearT>
-    OrdinalDate<YearT>::OrdinalDate(YearT year, uint16_t day_of_year)
-        : year_(year)
-        , day_of_year_(day_of_year)
+private:
+    int32_t year_ = 0;
+    uint16_t day_of_year_ = 0;
+};
+
+class MonthDay
+{
+public:
+    /** Construct from month and day. */
+    MonthDay(uint8_t month, uint8_t day_of_month);
+
+    /** Construct from day of year in Gregorian or Julian
+      * calendars to month and day.
+      */
+    MonthDay(bool is_leap_year, uint16_t day_of_year);
+
+    /** Convert month and day in Gregorian or Julian calendars to
+      * day of year.
+      */
+    uint16_t dayOfYear(bool is_leap_year) const;
+
+    uint8_t month() const noexcept
     {
-        if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
-        }
+        return month_;
     }
 
-    template <typename YearT>
-    template <is_integer DayT>
-    OrdinalDate<YearT>::OrdinalDate(DayT modified_julian_day)
+    uint8_t dayOfMonth() const noexcept
     {
-        /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
-
-        if constexpr (is_signed_v<DayT> && std::numeric_limits<DayT>::lowest() < -678941)
-            if (modified_julian_day < -678941)
-                throw Exception(
-                    ErrorCodes::CANNOT_FORMAT_DATETIME,
-                    "Value cannot be represented as date because it's out of range");
-
-        if constexpr (std::numeric_limits<DayT>::max() > 2973119)
-            if (modified_julian_day > 2973119)
-                throw Exception(
-                    ErrorCodes::CANNOT_FORMAT_DATETIME,
-                    "Value cannot be represented as date because it's out of range");
-
-        const auto a         = modified_julian_day + 678575;
-        const auto quad_cent = gd::div(a, 146097);
-        const auto b         = gd::mod(a, 146097);
-        const auto cent      = gd::min(gd::div(b, 36524), 3);
-        const auto c         = b - cent * 36524;
-        const auto quad      = gd::div(c, 1461);
-        const auto d         = gd::mod(c, 1461);
-        const auto y         = gd::min(gd::div(d, 365), 3);
-
-        day_of_year_ = d - y * 365 + 1;
-        year_ = static_cast<YearT>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
+        return day_of_month_;
     }
 
-    template <typename YearT>
-    template <is_integer T>
-    T OrdinalDate<YearT>::toModifiedJulianDay() const noexcept
-    {
-        const auto y = year_ - 1;
-        return day_of_year_
-            + 365 * y
-            + gd::div(y, 4)
-            - gd::div(y, 100)
-            + gd::div(y, 400)
-            - 678576;
-    }
+private:
+    uint8_t month_ = 0;
+    uint8_t day_of_month_ = 0;
+};
 
-    inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
-        : month_(month)
-        , day_of_month_(day_of_month)
-    {
-        if (month < 1 || month > 12)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
-        /* We can't validate day_of_month here, because we don't know if
-         * it's a leap year. */
-    }
-
-    inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
-    {
-        if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
-                            (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
-
-        month_ = 1;
-        uint16_t d = day_of_year;
-        while (true)
-        {
-            const auto len = gd::monthLength(is_leap_year, month_);
-            if (d <= len)
-                break;
-            month_++;
-            d -= len;
-        }
-        day_of_month_ = d;
-    }
-
-    inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const
-    {
-        if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
-                (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
-        }
-        const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
-        return (367 * month_ - 362) / 12 + k + day_of_month_;
-    }
 }
diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index a7c2c04bf01..bad0696e503 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -13,6 +13,7 @@
 #include <IO/WriteBufferFromVector.h>
 #include <IO/WriteHelpers.h>
 
+
 namespace DB
 {
 
@@ -56,14 +57,14 @@ namespace DB
             {
                 if constexpr (nullOnErrors)
                 {
-                    const GregorianDate<> gd(vec_from[i]);
+                    const GregorianDate gd(vec_from[i]);
                     (*vec_null_map_to)[i] = gd.tryWrite(write_buffer);
                     writeChar(0, write_buffer);
                     offsets_to[i] = write_buffer.count();
                 }
                 else
                 {
-                    const GregorianDate<> gd(vec_from[i]);
+                    const GregorianDate gd(vec_from[i]);
                     gd.write(write_buffer);
                     writeChar(0, write_buffer);
                     offsets_to[i] = write_buffer.count();
diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp
index 0d854bcc110..f800b279385 100644
--- a/src/Functions/toModifiedJulianDay.cpp
+++ b/src/Functions/toModifiedJulianDay.cpp
@@ -80,8 +80,8 @@ namespace DB
                 {
                     try
                     {
-                        const GregorianDate<> date(read_buffer);
-                        vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>();
+                        const GregorianDate date(read_buffer);
+                        vec_to[i] = static_cast<typename ToDataType::FieldType>(date.toModifiedJulianDay());
                         vec_null_map_to[i] = false;
                     }
                     catch (const Exception & e)
@@ -97,8 +97,8 @@ namespace DB
                 }
                 else
                 {
-                    const GregorianDate<> date(read_buffer);
-                    vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>();
+                    const GregorianDate date(read_buffer);
+                    vec_to[i] = static_cast<typename ToDataType::FieldType>(date.toModifiedJulianDay());
                 }
             }
 

From 2a6b5e4ec6134e5c6451301ddcfa5d6acd949567 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 4 Jul 2023 23:28:45 +0200
Subject: [PATCH 045/242] Fixed bad code

---
 src/Functions/GregorianDate.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
index 0f8a95ff3e7..38ed3e2ddf8 100644
--- a/src/Functions/GregorianDate.cpp
+++ b/src/Functions/GregorianDate.cpp
@@ -115,7 +115,7 @@ GregorianDate::GregorianDate(ReadBuffer & in)
     assertEOF(in);
 
     if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
-        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
+        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date");
 }
 
 GregorianDate::GregorianDate(int64_t modified_julian_day)
@@ -143,7 +143,7 @@ ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
     {
         if constexpr (std::is_same_v<ReturnType, void>)
             throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
-                "Impossible to stringify: year too big or small: {}", DB::toString(year_));
+                "Impossible to stringify: year too big or small: {}", year_);
         else
             return false;
     }
@@ -231,7 +231,7 @@ MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
     , day_of_month_(day_of_month)
 {
     if (month < 1 || month > 12)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month);
     /* We can't validate day_of_month here, because we don't know if
      * it's a leap year. */
 }
@@ -240,7 +240,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
 {
     if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
-                        (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
+                        (is_leap_year ? "leap, " : "non-leap, "), day_of_year);
 
     month_ = 1;
     uint16_t d = day_of_year;
@@ -249,7 +249,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
         const auto len = gd::monthLength(is_leap_year, month_);
         if (d <= len)
             break;
-        month_++;
+        ++month_;
         d -= len;
     }
     day_of_month_ = d;
@@ -260,7 +260,7 @@ uint16_t MonthDay::dayOfYear(bool is_leap_year) const
     if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
     {
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
-            (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
+            (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_);
     }
     const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
     return (367 * month_ - 362) / 12 + k + day_of_month_;

From 24b9c430f83b938329d228abd62ed44845fa63fc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 5 Jul 2023 00:39:10 +0200
Subject: [PATCH 046/242] Continuation

---
 src/Functions/GregorianDate.cpp         | 198 ++++++++++++++++++------
 src/Functions/GregorianDate.h           |  17 ++
 src/Functions/fromModifiedJulianDay.cpp |   6 +-
 src/Functions/toModifiedJulianDay.cpp   |  23 +--
 4 files changed, 178 insertions(+), 66 deletions(-)

diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
index 38ed3e2ddf8..da1172c8916 100644
--- a/src/Functions/GregorianDate.cpp
+++ b/src/Functions/GregorianDate.cpp
@@ -1,7 +1,6 @@
 #include <Functions/GregorianDate.h>
 
 #include <Common/Exception.h>
-#include <Core/Types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -19,7 +18,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-namespace gd
+namespace
 {
     static inline constexpr bool is_leap_year(int32_t year)
     {
@@ -93,49 +92,129 @@ namespace gd
         else
             return c - '0';
     }
+
+    static inline bool tryReadDigit(ReadBuffer & in, char & c)
+    {
+        if (in.read(c) && c >= '0' && c <= '9')
+        {
+            c -= '0';
+            return true;
+        }
+
+        return false;
+    }
+}
+
+void GregorianDate::init(ReadBuffer & in)
+{
+    year_ = readDigit(in) * 1000
+          + readDigit(in) * 100
+          + readDigit(in) * 10
+          + readDigit(in);
+
+    assertChar('-', in);
+
+    month_ = readDigit(in) * 10
+           + readDigit(in);
+
+    assertChar('-', in);
+
+    day_of_month_ = readDigit(in) * 10
+                + readDigit(in);
+
+    assertEOF(in);
+
+    if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
+        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date");
+}
+
+bool GregorianDate::tryInit(ReadBuffer & in)
+{
+    char c[8];
+
+    if (   !tryReadDigit(in, c[0])
+        || !tryReadDigit(in, c[1])
+        || !tryReadDigit(in, c[2])
+        || !tryReadDigit(in, c[3])
+        || !checkChar('-', in)
+        || !tryReadDigit(in, c[4])
+        || !tryReadDigit(in, c[5])
+        || !checkChar('-', in)
+        || !tryReadDigit(in, c[6])
+        || !tryReadDigit(in, c[7])
+        || !in.eof())
+    {
+        return false;
+    }
+
+    year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3];
+    month_ = c[4] * 10 + c[5];
+    day_of_month_ = c[6] * 10 + c[7];
+
+    if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
+        return false;
+
+    return true;
 }
 
 GregorianDate::GregorianDate(ReadBuffer & in)
 {
-    year_ = gd::readDigit(in) * 1000
-          + gd::readDigit(in) * 100
-          + gd::readDigit(in) * 10
-          + gd::readDigit(in);
+    init(in);
+}
 
-    assertChar('-', in);
+void GregorianDate::init(int64_t modified_julian_day)
+{
+    const OrdinalDate ord(modified_julian_day);
+    const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
 
-    month_ = gd::readDigit(in) * 10
-           + gd::readDigit(in);
+    year_  = ord.year();
+    month_ = md.month();
+    day_of_month_ = md.dayOfMonth();
+}
 
-    assertChar('-', in);
+bool GregorianDate::tryInit(int64_t modified_julian_day)
+{
+    OrdinalDate ord;
+    if (!ord.tryInit(modified_julian_day))
+        return false;
 
-    day_of_month_ = gd::readDigit(in) * 10
-                + gd::readDigit(in);
+    MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
 
-    assertEOF(in);
+    year_  = ord.year();
+    month_ = md.month();
+    day_of_month_ = md.dayOfMonth();
 
-    if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
-        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date");
+    return true;
 }
 
 GregorianDate::GregorianDate(int64_t modified_julian_day)
 {
-    const OrdinalDate ord(modified_julian_day);
-    const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
-
-    year_       = ord.year();
-    month_      = md.month();
-    day_of_month_ = md.dayOfMonth();
+    init(modified_julian_day);
 }
 
 int64_t GregorianDate::toModifiedJulianDay() const
 {
     const MonthDay md(month_, day_of_month_);
-    const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
+
+    const auto day_of_year = md.dayOfYear(is_leap_year(year_));
+
     const OrdinalDate ord(year_, day_of_year);
     return ord.toModifiedJulianDay();
 }
 
+bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const
+{
+    const MonthDay md(month_, day_of_month_);
+    const auto day_of_year = md.dayOfYear(is_leap_year(year_));
+    OrdinalDate ord;
+
+    if (!ord.tryInit(year_, day_of_year))
+        return false;
+
+    res = ord.toModifiedJulianDay();
+    return true;
+}
+
 template <typename ReturnType>
 ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
 {
@@ -178,51 +257,76 @@ std::string GregorianDate::toString() const
     return buf.str();
 }
 
-OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
-    : year_(year)
-    , day_of_year_(day_of_year)
+void OrdinalDate::init(int32_t year, uint16_t day_of_year)
 {
-    if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
-    }
+    year_ = year;
+    day_of_year_ = day_of_year;
+
+    if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year);
 }
 
-OrdinalDate::OrdinalDate(int64_t modified_julian_day)
+bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year)
+{
+    year_ = year;
+    day_of_year_ = day_of_year;
+
+    return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365));
+}
+
+void OrdinalDate::init(int64_t modified_julian_day)
+{
+    if (!tryInit(modified_julian_day))
+        throw Exception(
+            ErrorCodes::CANNOT_FORMAT_DATETIME,
+            "Value cannot be represented as date because it's out of range");
+}
+
+bool OrdinalDate::tryInit(int64_t modified_julian_day)
 {
     /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
 
     if (modified_julian_day < -678941)
-        throw Exception(
-            ErrorCodes::CANNOT_FORMAT_DATETIME,
-            "Value cannot be represented as date because it's out of range");
+        return false;
 
     if (modified_julian_day > 2973119)
-        throw Exception(
-            ErrorCodes::CANNOT_FORMAT_DATETIME,
-            "Value cannot be represented as date because it's out of range");
+        return false;
 
     const auto a         = modified_julian_day + 678575;
-    const auto quad_cent = gd::div(a, 146097);
-    const auto b         = gd::mod(a, 146097);
-    const auto cent      = gd::min(gd::div(b, 36524), 3);
+    const auto quad_cent = div(a, 146097);
+    const auto b         = mod(a, 146097);
+    const auto cent      = min(div(b, 36524), 3);
     const auto c         = b - cent * 36524;
-    const auto quad      = gd::div(c, 1461);
-    const auto d         = gd::mod(c, 1461);
-    const auto y         = gd::min(gd::div(d, 365), 3);
+    const auto quad      = div(c, 1461);
+    const auto d         = mod(c, 1461);
+    const auto y         = min(div(d, 365), 3);
 
     day_of_year_ = d - y * 365 + 1;
     year_ = static_cast<int32_t>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
+
+    return true;
+}
+
+
+OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
+{
+    init(year, day_of_year);
+}
+
+OrdinalDate::OrdinalDate(int64_t modified_julian_day)
+{
+    init(modified_julian_day);
 }
 
 int64_t OrdinalDate::toModifiedJulianDay() const noexcept
 {
     const auto y = year_ - 1;
+
     return day_of_year_
         + 365 * y
-        + gd::div(y, 4)
-        - gd::div(y, 100)
-        + gd::div(y, 400)
+        + div(y, 4)
+        - div(y, 100)
+        + div(y, 400)
         - 678576;
 }
 
@@ -246,7 +350,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
     uint16_t d = day_of_year;
     while (true)
     {
-        const auto len = gd::monthLength(is_leap_year, month_);
+        const auto len = monthLength(is_leap_year, month_);
         if (d <= len)
             break;
         ++month_;
@@ -257,7 +361,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
 
 uint16_t MonthDay::dayOfYear(bool is_leap_year) const
 {
-    if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
+    if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_))
     {
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
             (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_);
diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h
index 4a0cbec5afe..2528223443e 100644
--- a/src/Functions/GregorianDate.h
+++ b/src/Functions/GregorianDate.h
@@ -13,11 +13,19 @@ class WriteBuffer;
 class GregorianDate
 {
 public:
+    GregorianDate() {}
+
+    void init(ReadBuffer & in);
+    bool tryInit(ReadBuffer & in);
+
     /** Construct from date in text form 'YYYY-MM-DD' by reading from
       * ReadBuffer.
       */
     explicit GregorianDate(ReadBuffer & in);
 
+    void init(int64_t modified_julian_day);
+    bool tryInit(int64_t modified_julian_day);
+
     /** Construct from Modified Julian Day. The type T is an
       * integral type which should be at least 32 bits wide, and
       * should preferably signed.
@@ -29,6 +37,7 @@ public:
       * signed.
       */
     int64_t toModifiedJulianDay() const;
+    bool tryToModifiedJulianDay(int64_t & res) const;
 
     /** Write the date in text form 'YYYY-MM-DD' to a buffer.
       */
@@ -75,6 +84,14 @@ private:
 class OrdinalDate
 {
 public:
+    OrdinalDate() {}
+
+    void init(int32_t year, uint16_t day_of_year);
+    bool tryInit(int32_t year, uint16_t day_of_year);
+
+    void init(int64_t modified_julian_day);
+    bool tryInit(int64_t modified_julian_day);
+
     OrdinalDate(int32_t year, uint16_t day_of_year);
 
     /** Construct from Modified Julian Day. The type T is an
diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index bad0696e503..8736b1fce7f 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -57,14 +57,14 @@ namespace DB
             {
                 if constexpr (nullOnErrors)
                 {
-                    const GregorianDate gd(vec_from[i]);
-                    (*vec_null_map_to)[i] = gd.tryWrite(write_buffer);
+                    GregorianDate gd;
+                    (*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer));
                     writeChar(0, write_buffer);
                     offsets_to[i] = write_buffer.count();
                 }
                 else
                 {
-                    const GregorianDate gd(vec_from[i]);
+                    GregorianDate gd(vec_from[i]);
                     gd.write(write_buffer);
                     writeChar(0, write_buffer);
                     offsets_to[i] = write_buffer.count();
diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp
index f800b279385..5b4cd34141c 100644
--- a/src/Functions/toModifiedJulianDay.cpp
+++ b/src/Functions/toModifiedJulianDay.cpp
@@ -78,22 +78,13 @@ namespace DB
 
                 if constexpr (nullOnErrors)
                 {
-                    try
-                    {
-                        const GregorianDate date(read_buffer);
-                        vec_to[i] = static_cast<typename ToDataType::FieldType>(date.toModifiedJulianDay());
-                        vec_null_map_to[i] = false;
-                    }
-                    catch (const Exception & e)
-                    {
-                        if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE)
-                        {
-                            vec_to[i] = static_cast<Int32>(0);
-                            vec_null_map_to[i] = true;
-                        }
-                        else
-                            throw;
-                    }
+                    GregorianDate date;
+
+                    int64_t res = 0;
+                    bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res);
+
+                    vec_to[i] = static_cast<typename ToDataType::FieldType>(res);
+                    vec_null_map_to[i] = !success;
                 }
                 else
                 {

From b3edfbaab63af0b2168ce5d68ce63264e2093de4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Jul 2023 21:10:25 +0000
Subject: [PATCH 047/242] Update docs

---
 docs/en/sql-reference/statements/show.md | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index b5bacef7b1f..1a1e4dbd2c7 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -289,8 +289,8 @@ The statement produces a result table with the following structure:
 - table - The name of the table. (String)
 - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
 - key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String)
-- seq_in_index - Currently always `1`. (In MySQL, this field denotes the position of the column in a non-functional index.) (UInt8)
-- column_name - Currently always `` (empty string), also see field `expression`. (In MySQL, this field denotes the name of the column in a non-functional index.) (String)
+- column_name - For a primary key index, the name of the column. For a data skipping index: '' (empty string), see field "expression". (String)
+- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8)
 - collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String))
 - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
 - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String))
@@ -300,7 +300,7 @@ The statement produces a result table with the following structure:
 - comment - Additional information about the index, currently always `` (empty string). (String)
 - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
 - visible - If the index is visible to the optimizer, always `YES`. (String)
-- expression - The index expression. (In MySQL this field is only used for functional-indexes.) (String)
+- expression - For a data skipping index, the index expression. For a primary key index: '' (empty string). (String)
 
 **Examples**
 
@@ -313,13 +313,14 @@ SHOW INDEX FROM 'tbl'
 Result:
 
 ``` text
-┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          1 │ blf_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
-│ tbl   │          1 │ mm1_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
-│ tbl   │          1 │ mm2_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
-│ tbl   │          1 │ PRIMARY  │ 1            │             │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │ c, a       │
-│ tbl   │          1 │ set_idx  │ 1            │             │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
-└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
+┌─table─┬─non_unique─┬─key_name─┬─column_name─┬─seq_in_index─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
+│ tbl   │          1 │ blf_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          1 │ mm1_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          1 │ mm2_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          1 │ PRIMARY  │ c           │ 1            │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
+│ tbl   │          1 │ PRIMARY  │ a           │ 2            │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
+│ tbl   │          1 │ set_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
+└───────┴────────────┴──────────┴─────────────┴──────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
 **See also**

From e3796e30546a8a56ba06d76ae57317b5fc1abd7c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Jul 2023 09:01:09 +0000
Subject: [PATCH 048/242] Update ORDER BY for more stable test results

---
 src/Interpreters/InterpreterShowIndexesQuery.cpp       | 2 +-
 tests/queries/0_stateless/02724_show_indexes.reference | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 2f65cc3ec3a..35f32a79310 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -80,7 +80,7 @@ FROM (
             database = '{0}'
             AND table = '{1}'))
 {2}
-ORDER BY index_type, expression;)", database, table, where_expression);
+ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression);
 
     /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to
     /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index 063105e3332..cee0598d625 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -20,8 +20,8 @@ tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
 tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
 tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
 tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
 tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
 tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
@@ -40,8 +40,8 @@ NULL	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES
 tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
 tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
 tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
 tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
 tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
 tbl	1	mmi_idx		1	\N	0	\N	\N	\N	MINMAX			YES	b

From 45db928e4e31aae6a6d7e8e6b35e0a5a3768375c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 6 Jul 2023 02:52:55 +0200
Subject: [PATCH 049/242] Fix style

---
 src/Functions/fromModifiedJulianDay.cpp | 1 -
 src/Functions/toModifiedJulianDay.cpp   | 2 --
 2 files changed, 3 deletions(-)

diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index 8736b1fce7f..695d1b7d63c 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -19,7 +19,6 @@ namespace DB
 
     namespace ErrorCodes
     {
-        extern const int CANNOT_FORMAT_DATETIME;
         extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     }
 
diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp
index 5b4cd34141c..907c7570ce2 100644
--- a/src/Functions/toModifiedJulianDay.cpp
+++ b/src/Functions/toModifiedJulianDay.cpp
@@ -17,8 +17,6 @@ namespace DB
     {
         extern const int ILLEGAL_COLUMN;
         extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-        extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
-        extern const int CANNOT_PARSE_DATE;
     }
 
     template <typename Name, typename ToDataType, bool nullOnErrors>

From dee71d2e2f8cdd6be4a82f26e7af9b8a75453091 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 6 Jul 2023 13:16:31 +0000
Subject: [PATCH 050/242] Add first version of hasSubsequence()

---
 src/Functions/HasSubsequenceImpl.h            | 131 ++++++++++++++++++
 src/Functions/hasSubsequence.cpp              |  29 ++++
 .../hasSubsequenceCaseInsensitive.cpp         |  28 ++++
 src/Functions/like.cpp                        |   1 -
 .../02809_has_subsequence.reference           |  16 +++
 .../0_stateless/02809_has_subsequence.sql     |  19 +++
 6 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 src/Functions/HasSubsequenceImpl.h
 create mode 100644 src/Functions/hasSubsequence.cpp
 create mode 100644 src/Functions/hasSubsequenceCaseInsensitive.cpp
 create mode 100644 tests/queries/0_stateless/02809_has_subsequence.reference
 create mode 100644 tests/queries/0_stateless/02809_has_subsequence.sql

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
new file mode 100644
index 00000000000..3a29ef68b0b
--- /dev/null
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -0,0 +1,131 @@
+#pragma once
+
+
+namespace DB
+{
+namespace
+{
+
+template <typename Name, typename Impl>
+struct HasSubsequenceImpl
+{
+    using ResultType = UInt8;
+
+    static constexpr bool use_default_implementation_for_constants = false;
+    static constexpr bool supports_start_pos = false;
+    static constexpr auto name = Name::name;
+
+    static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};}
+
+    /// Find one substring in many strings.
+    static void vectorConstant(
+        const ColumnString::Chars & /*haystack_data*/,
+        const ColumnString::Offsets & /*haystack_offsets*/,
+        const std::string & /*needle*/,
+        const ColumnPtr & /*start_pos*/,
+        PaddedPODArray<UInt8> & res,
+        [[maybe_unused]] ColumnUInt8 * /*res_null*/)
+    {
+        size_t size = res.size();
+        for (size_t i = 0; i < size; ++i)
+        {
+            res[i] = 0;
+        }
+    }
+
+    /// Search each time for a different single substring inside each time different string.
+    static void vectorVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnPtr & /*start_pos*/,
+        PaddedPODArray<UInt8> & res,
+        ColumnUInt8 * /*res_null*/)
+    {
+        ColumnString::Offset prev_haystack_offset = 0;
+        ColumnString::Offset prev_needle_offset = 0;
+
+        size_t size = haystack_offsets.size();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
+            size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            if (0 == needle_size)
+            {
+                res[i] = 1;
+            }
+            else
+            {
+                const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
+                const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
+                res[i] = impl(haystack, haystack_size, needle, needle_size);
+            }
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_needle_offset = needle_offsets[i];
+        }
+    }
+
+    /// Find many substrings in single string.
+    static void constantVector(
+        const String & /*haystack*/,
+        const ColumnString::Chars & /*needle_data*/,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnPtr & /*start_pos*/,
+        PaddedPODArray<UInt8> & res,
+        ColumnUInt8 * /*res_null*/)
+    {
+        size_t size = needle_offsets.size();
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            res[i] = 0;
+        }
+    }
+
+    static UInt8 impl(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
+    {
+        size_t j = 0;
+        for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
+            if (needle[j] == haystack[i])
+                ++j;
+        return j == needle_size;
+    }
+
+    static void constantConstant(
+        std::string haystack,
+        std::string needle,
+        const ColumnPtr & /*start_pos*/,
+        PaddedPODArray<UInt8> & res,
+        ColumnUInt8 * /*res_null*/)
+    {
+        size_t size = res.size();
+        Impl::toLowerIfNeed(haystack);
+        Impl::toLowerIfNeed(needle);
+
+        UInt8 result = impl(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            res[i] = result;
+        }
+    }
+    template <typename... Args>
+    static void vectorFixedConstant(Args &&...)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name);
+    }
+
+    template <typename... Args>
+    static void vectorFixedVector(Args &&...)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name);
+    }
+};
+
+}
+
+}
diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp
new file mode 100644
index 00000000000..da2aaddcf50
--- /dev/null
+++ b/src/Functions/hasSubsequence.cpp
@@ -0,0 +1,29 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsStringSearch.h>
+#include <Functions/HasSubsequenceImpl.h>
+
+
+namespace DB
+{
+namespace
+{
+
+struct HasSubsequenceCaseSensitiveASCII
+{
+    static void toLowerIfNeed(std::string & /*s*/) { }
+};
+
+struct NameHasSubsequence
+{
+    static constexpr auto name = "hasSubsequence";
+};
+
+using FunctionHasSubsequence = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>>;
+}
+
+REGISTER_FUNCTION(hasSubsequence)
+{
+    factory.registerFunction<FunctionHasSubsequence>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp
new file mode 100644
index 00000000000..f5c13a7cf8c
--- /dev/null
+++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp
@@ -0,0 +1,28 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsStringSearch.h>
+#include <Functions/HasSubsequenceImpl.h>
+
+namespace DB
+{
+namespace
+{
+
+struct HasSubsequenceCaseInsensitiveASCII
+{
+    static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+};
+
+struct NameHasSubsequenceCaseInsensitive
+{
+    static constexpr auto name = "hasSubsequenceCaseInsensitive";
+};
+
+using FunctionHasSubsequenceCaseInsensitive = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>>;
+}
+
+REGISTER_FUNCTION(hasSubsequenceCaseInsensitive)
+{
+    factory.registerFunction<FunctionHasSubsequenceCaseInsensitive>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp
index 3a3345051d4..5a86e37a92d 100644
--- a/src/Functions/like.cpp
+++ b/src/Functions/like.cpp
@@ -1,4 +1,3 @@
-#include "FunctionsStringSearch.h"
 #include "FunctionFactory.h"
 #include "like.h"
 
diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
new file mode 100644
index 00000000000..827caa105d0
--- /dev/null
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -0,0 +1,16 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+1
+1
+1
+0
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
new file mode 100644
index 00000000000..63ffb49dc54
--- /dev/null
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -0,0 +1,19 @@
+select hasSubsequence('garbage', '');
+select hasSubsequence('garbage', 'g');
+select hasSubsequence('garbage', 'a');
+select hasSubsequence('garbage', 'e');
+select hasSubsequence('garbage', 'gr');
+select hasSubsequence('garbage', 'ab');
+select hasSubsequence('garbage', 'be');
+select hasSubsequence('garbage', 'arg');
+select hasSubsequence('garbage', 'garbage');
+
+select hasSubsequence('garbage', 'garbage1');
+select hasSubsequence('garbage', 'arbw');
+select hasSubsequence('garbage', 'ARG');
+
+select hasSubsequenceCaseInsensitive('garbage', 'ARG');
+
+select hasSubsequence(materialize('garbage'), materialize(''));
+select hasSubsequence(materialize('garbage'), materialize('arg'));
+select hasSubsequence(materialize('garbage'), materialize('garbage1'));
\ No newline at end of file

From 67e2dee7e2ea926d6a0a6ab35b31b2515f518426 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 6 Jul 2023 14:29:58 +0000
Subject: [PATCH 051/242] Allow SETTINGS before FORMAT in DESCRIBE TABLE query

---
 src/Parsers/ParserDescribeTableQuery.cpp      | 20 +++++++++++++++----
 src/Parsers/ParserQueryWithOutput.cpp         |  2 +-
 src/Parsers/ParserTablePropertiesQuery.cpp    |  2 --
 src/Storages/StorageDistributed.cpp           |  1 -
 src/Storages/getStructureOfRemoteTable.cpp    |  1 -
 .../02789_describe_table_settings.reference   | 10 ++++++++++
 .../02789_describe_table_settings.sql         |  3 +++
 7 files changed, 30 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/02789_describe_table_settings.reference
 create mode 100644 tests/queries/0_stateless/02789_describe_table_settings.sql

diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp
index ad6d2c5bcc6..fcfc4799dbe 100644
--- a/src/Parsers/ParserDescribeTableQuery.cpp
+++ b/src/Parsers/ParserDescribeTableQuery.cpp
@@ -3,6 +3,7 @@
 #include <Parsers/CommonParsers.h>
 #include <Parsers/ParserDescribeTableQuery.h>
 #include <Parsers/ParserTablesInSelectQuery.h>
+#include <Parsers/ParserSetQuery.h>
 
 #include <Common/typeid_cast.h>
 
@@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
     ParserKeyword s_describe("DESCRIBE");
     ParserKeyword s_desc("DESC");
     ParserKeyword s_table("TABLE");
+    ParserKeyword s_settings("SETTINGS");
     ParserToken s_dot(TokenType::Dot);
     ParserIdentifier name_p;
+    ParserSetQuery parser_settings(true);
 
     ASTPtr database;
     ASTPtr table;
@@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
 
     s_table.ignore(pos, expected);
 
-    ASTPtr table_expression;
-    if (!ParserTableExpression().parse(pos, table_expression, expected))
+    if (!ParserTableExpression().parse(pos, query->table_expression, expected))
         return false;
 
-    query->children.push_back(std::move(table_expression));
-    query->table_expression = query->children.back();
+    /// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT
+    ASTPtr settings;
+    if (s_settings.ignore(pos, expected))
+    {
+        if (!parser_settings.parse(pos, query->settings_ast, expected))
+            return false;
+    }
+
+    query->children.push_back(query->table_expression);
+
+    if (query->settings_ast)
+        query->children.push_back(query->settings_ast);
 
     node = query;
 
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index 6796f4528c4..5dc713ca8c6 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -150,7 +150,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
 
     // SETTINGS key1 = value1, key2 = value2, ...
     ParserKeyword s_settings("SETTINGS");
-    if (s_settings.ignore(pos, expected))
+    if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
     {
         ParserSetQuery parser_settings(true);
         if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
diff --git a/src/Parsers/ParserTablePropertiesQuery.cpp b/src/Parsers/ParserTablePropertiesQuery.cpp
index b73ce8de359..94f264fcc89 100644
--- a/src/Parsers/ParserTablePropertiesQuery.cpp
+++ b/src/Parsers/ParserTablePropertiesQuery.cpp
@@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
 {
     ParserKeyword s_exists("EXISTS");
     ParserKeyword s_temporary("TEMPORARY");
-    ParserKeyword s_describe("DESCRIBE");
-    ParserKeyword s_desc("DESC");
     ParserKeyword s_show("SHOW");
     ParserKeyword s_create("CREATE");
     ParserKeyword s_database("DATABASE");
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index b91ad0b963a..b6359bbb251 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -60,7 +60,6 @@
 #include <Interpreters/Cluster.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/InterpreterDescribeQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Interpreters/InterpreterInsertQuery.h>
diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp
index e5fc01be9f4..ec8f27feeda 100644
--- a/src/Storages/getStructureOfRemoteTable.cpp
+++ b/src/Storages/getStructureOfRemoteTable.cpp
@@ -2,7 +2,6 @@
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ClusterProxy/executeQuery.h>
-#include <Interpreters/InterpreterDescribeQuery.h>
 #include <QueryPipeline/RemoteQueryExecutor.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeString.h>
diff --git a/tests/queries/0_stateless/02789_describe_table_settings.reference b/tests/queries/0_stateless/02789_describe_table_settings.reference
new file mode 100644
index 00000000000..c2bf9219f4d
--- /dev/null
+++ b/tests/queries/0_stateless/02789_describe_table_settings.reference
@@ -0,0 +1,10 @@
+"id","Nullable(Int64)","","","","",""
+"age","LowCardinality(UInt8)","","","","",""
+"name","Nullable(String)","","","","",""
+"status","Nullable(String)","","","","",""
+"hobbies","Array(Nullable(String))","","","","",""
+"id","Nullable(Int64)","","","","",""
+"age","LowCardinality(UInt8)","","","","",""
+"name","Nullable(String)","","","","",""
+"status","Nullable(String)","","","","",""
+"hobbies","Array(Nullable(String))","","","","",""
diff --git a/tests/queries/0_stateless/02789_describe_table_settings.sql b/tests/queries/0_stateless/02789_describe_table_settings.sql
new file mode 100644
index 00000000000..64b5b21fea8
--- /dev/null
+++ b/tests/queries/0_stateless/02789_describe_table_settings.sql
@@ -0,0 +1,3 @@
+DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 FORMAT CSV;
+DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1;
+DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 SETTINGS max_threads=0; -- { clientError SYNTAX_ERROR }

From 9a295eca46fea2c88d1c1767fc4625b31c999572 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Jul 2023 14:28:50 +0000
Subject: [PATCH 052/242] Incorporate review feedback

---
 docs/en/sql-reference/statements/show.md      | 24 ++---
 .../InterpreterShowIndexesQuery.cpp           |  4 +-
 .../0_stateless/02724_show_indexes.reference  | 88 +++++++++----------
 3 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 1a1e4dbd2c7..1c399d2072b 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -289,18 +289,18 @@ The statement produces a result table with the following structure:
 - table - The name of the table. (String)
 - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
 - key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String)
-- column_name - For a primary key index, the name of the column. For a data skipping index: '' (empty string), see field "expression". (String)
 - seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8)
+- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String)
 - collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String))
 - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
 - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String))
 - packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String))
 - null - Currently unused
 - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
-- comment - Additional information about the index, currently always `` (empty string). (String)
-- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
+- comment - Additional information about the index, currently always `''` (empty string). (String)
+- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
 - visible - If the index is visible to the optimizer, always `YES`. (String)
-- expression - For a data skipping index, the index expression. For a primary key index: '' (empty string). (String)
+- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String)
 
 **Examples**
 
@@ -313,14 +313,14 @@ SHOW INDEX FROM 'tbl'
 Result:
 
 ``` text
-┌─table─┬─non_unique─┬─key_name─┬─column_name─┬─seq_in_index─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl   │          1 │ blf_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
-│ tbl   │          1 │ mm1_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
-│ tbl   │          1 │ mm2_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
-│ tbl   │          1 │ PRIMARY  │ c           │ 1            │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
-│ tbl   │          1 │ PRIMARY  │ a           │ 2            │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
-│ tbl   │          1 │ set_idx  │ 1           │ 1            │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
-└───────┴────────────┴──────────┴─────────────┴──────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
+┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
+│ tbl   │          1 │ blf_idx  │ 1            │ 1           │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ BLOOM_FILTER │         │               │ YES     │ d, b       │
+│ tbl   │          1 │ mm1_idx  │ 1            │ 1           │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ a, c, d    │
+│ tbl   │          1 │ mm2_idx  │ 1            │ 1           │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ MINMAX       │         │               │ YES     │ c, d, e    │
+│ tbl   │          1 │ PRIMARY  │ 1            │ c           │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
+│ tbl   │          1 │ PRIMARY  │ 2            │ a           │ A         │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ PRIMARY      │         │               │ YES     │            │
+│ tbl   │          1 │ set_idx  │ 1            │ 1           │ ᴺᵁᴸᴸ      │ 0           │ ᴺᵁᴸᴸ     │ ᴺᵁᴸᴸ   │ ᴺᵁᴸᴸ │ SET          │         │               │ YES     │ e          │
+└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
 ```
 
 **See also**
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 35f32a79310..149420006fb 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -42,8 +42,8 @@ FROM (
             name AS table,
             1 AS non_unique,
             'PRIMARY' AS key_name,
-            arrayJoin(splitByString(', ', primary_key)) AS column_name,
             row_number() over (order by column_name) AS seq_in_index,
+            arrayJoin(splitByString(', ', primary_key)) AS column_name,
             'A' AS collation,
             0 AS cardinality,
             NULL AS sub_part,
@@ -63,8 +63,8 @@ FROM (
             table AS table,
             1 AS non_unique,
             name AS key_name,
-            '' AS column_name,
             1 AS seq_in_index,
+            '' AS column_name,
             NULL AS collation,
             0 AS cardinality,
             NULL AS sub_part,
diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference
index cee0598d625..e41f2521f5c 100644
--- a/tests/queries/0_stateless/02724_show_indexes.reference
+++ b/tests/queries/0_stateless/02724_show_indexes.reference
@@ -1,51 +1,51 @@
 --- Aliases of SHOW INDEX
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- EXTENDED
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- WHERE
 --- Check with weird table names
-$4@^7	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
-NULL	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
-\'	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
-\'	1	PRIMARY	c	1	A	0	\N	\N	\N	PRIMARY			YES	
+$4@^7	1	PRIMARY	1	c	A	0	\N	\N	\N	PRIMARY			YES	
+NULL	1	PRIMARY	1	c	A	0	\N	\N	\N	PRIMARY			YES	
+\'	1	PRIMARY	1	c	A	0	\N	\N	\N	PRIMARY			YES	
+\'	1	PRIMARY	1	c	A	0	\N	\N	\N	PRIMARY			YES	
 --- Original table
-tbl	1	blf_idx		1	\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
-tbl	1	mm1_idx		1	\N	0	\N	\N	\N	MINMAX			YES	a, c, d
-tbl	1	mm2_idx		1	\N	0	\N	\N	\N	MINMAX			YES	c, d, e
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	PRIMARY	c	2	A	0	\N	\N	\N	PRIMARY			YES	
-tbl	1	set_idx		1	\N	0	\N	\N	\N	SET			YES	e
+tbl	1	blf_idx	1		\N	0	\N	\N	\N	BLOOM_FILTER			YES	d, b
+tbl	1	mm1_idx	1		\N	0	\N	\N	\N	MINMAX			YES	a, c, d
+tbl	1	mm2_idx	1		\N	0	\N	\N	\N	MINMAX			YES	c, d, e
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	PRIMARY	2	c	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	set_idx	1		\N	0	\N	\N	\N	SET			YES	e
 --- Equally named table in other database
-tbl	1	mmi_idx		1	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	
 --- Short form
-tbl	1	mmi_idx		1	\N	0	\N	\N	\N	MINMAX			YES	b
-tbl	1	PRIMARY	a	1	A	0	\N	\N	\N	PRIMARY			YES	
+tbl	1	mmi_idx	1		\N	0	\N	\N	\N	MINMAX			YES	b
+tbl	1	PRIMARY	1	a	A	0	\N	\N	\N	PRIMARY			YES	

From 7255c35edcefe03a39ad7bcf460d9dca5670ca3b Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 6 Jul 2023 19:43:37 +0000
Subject: [PATCH 053/242] Add more tests

---
 .../functions/string-search-functions.md      | 50 +++++++++++++
 .../functions/string-search-functions.md      | 52 +++++++++++++
 src/Functions/HasSubsequenceImpl.h            | 74 ++++++++++++-------
 src/Functions/hasSubsequence.cpp              |  2 +-
 .../hasSubsequenceCaseInsensitive.cpp         |  2 +-
 .../hasSubsequenceCaseInsensitiveUTF8.cpp     | 28 +++++++
 src/Functions/hasSubsequenceUTF8.cpp          | 29 ++++++++
 .../02809_has_subsequence.reference           | 13 +++-
 .../0_stateless/02809_has_subsequence.sql     | 20 ++++-
 9 files changed, 237 insertions(+), 33 deletions(-)
 create mode 100644 src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
 create mode 100644 src/Functions/hasSubsequenceUTF8.cpp

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 3d8f89f7295..04ad6474310 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -631,3 +631,53 @@ Result:
 │ 100                                          │ 200                                          │ 100-200                                      │ 100                                       │
 └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
 ```
+
+## hasSubsequence
+
+Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
+A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
+
+
+**Syntax**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Arguments**
+
+- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
+
+**Returned values**
+
+- 1, if needle is a subsequence of haystack.
+- 0, otherwise.
+
+Type: `UInt8`.
+
+**Examples**
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Result:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│                                1 │
+└──────────────────────────────────┘
+```
+
+## hasSubsequenceCaseInsensitive
+
+Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
+
+## hasSubsequenceUTF8
+
+Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
\ No newline at end of file
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index ea4f90d4f66..21989e882b6 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
 │                                                          3 │
 └────────────────────────────────────────────────────────────┘
 ```
+
+## hasSubsequence(haystack, needle) {#hasSubsequence}
+
+Возвращает 1 если needle является подпоследовательностью haystack, иначе 0.
+
+
+**Синтаксис**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Аргументы**
+
+-   `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
+-   `needle` — подстрока, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
+
+**Возвращаемые значения**
+
+-   1, если 
+-   0, если подстрока не найдена.
+
+Тип: `UInt8`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Результат:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│                                1 │
+└──────────────────────────────────┘
+```
+
+
+## hasSubsequenceCaseInsensitive
+
+Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра.
+
+## hasSubsequenceUTF8
+
+Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра.
diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index 3a29ef68b0b..bcb8e8e99e6 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -1,11 +1,8 @@
 #pragma once
-
-
 namespace DB
 {
 namespace
 {
-
 template <typename Name, typename Impl>
 struct HasSubsequenceImpl
 {
@@ -17,23 +14,31 @@ struct HasSubsequenceImpl
 
     static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};}
 
-    /// Find one substring in many strings.
     static void vectorConstant(
-        const ColumnString::Chars & /*haystack_data*/,
-        const ColumnString::Offsets & /*haystack_offsets*/,
-        const std::string & /*needle*/,
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
         const ColumnPtr & /*start_pos*/,
         PaddedPODArray<UInt8> & res,
         [[maybe_unused]] ColumnUInt8 * /*res_null*/)
     {
-        size_t size = res.size();
-        for (size_t i = 0; i < size; ++i)
+        if (needle.empty())
         {
-            res[i] = 0;
+            for (auto & r : res)
+                r = 1;
+            return;
+        }
+
+        ColumnString::Offset prev_haystack_offset = 0;
+        for (size_t i = 0; i < haystack_offsets.size(); ++i)
+        {
+            size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
+            const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
+            res[i] = hasSubsequence(haystack, haystack_size, needle.c_str(), needle.size());
+            prev_haystack_offset = haystack_offsets[i];
         }
     }
 
-    /// Search each time for a different single substring inside each time different string.
     static void vectorVector(
         const ColumnString::Chars & haystack_data,
         const ColumnString::Offsets & haystack_offsets,
@@ -61,7 +66,7 @@ struct HasSubsequenceImpl
             {
                 const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
                 const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
-                res[i] = impl(haystack, haystack_size, needle, needle_size);
+                res[i] = hasSubsequence(haystack, haystack_size, needle, needle_size);
             }
 
             prev_haystack_offset = haystack_offsets[i];
@@ -69,35 +74,38 @@ struct HasSubsequenceImpl
         }
     }
 
-    /// Find many substrings in single string.
     static void constantVector(
-        const String & /*haystack*/,
-        const ColumnString::Chars & /*needle_data*/,
+        const String & haystack,
+        const ColumnString::Chars & needle_data,
         const ColumnString::Offsets & needle_offsets,
         const ColumnPtr & /*start_pos*/,
         PaddedPODArray<UInt8> & res,
         ColumnUInt8 * /*res_null*/)
     {
+        ColumnString::Offset prev_needle_offset = 0;
+
         size_t size = needle_offsets.size();
 
         for (size_t i = 0; i < size; ++i)
         {
-            res[i] = 0;
+            size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
+
+            if (0 == needle_size)
+            {
+                res[i] = 1;
+            }
+            else
+            {
+                const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
+                res[i] = hasSubsequence(haystack.c_str(), haystack.size(), needle, needle_size);
+            }
+            prev_needle_offset = needle_offsets[i];
         }
     }
 
-    static UInt8 impl(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
-    {
-        size_t j = 0;
-        for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
-            if (needle[j] == haystack[i])
-                ++j;
-        return j == needle_size;
-    }
-
     static void constantConstant(
-        std::string haystack,
-        std::string needle,
+        String haystack,
+        String needle,
         const ColumnPtr & /*start_pos*/,
         PaddedPODArray<UInt8> & res,
         ColumnUInt8 * /*res_null*/)
@@ -106,13 +114,23 @@ struct HasSubsequenceImpl
         Impl::toLowerIfNeed(haystack);
         Impl::toLowerIfNeed(needle);
 
-        UInt8 result = impl(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+        UInt8 result = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
 
         for (size_t i = 0; i < size; ++i)
         {
             res[i] = result;
         }
     }
+
+    static UInt8 hasSubsequence(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
+    {
+        size_t j = 0;
+        for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
+            if (needle[j] == haystack[i])
+                ++j;
+        return j == needle_size;
+    }
+
     template <typename... Args>
     static void vectorFixedConstant(Args &&...)
     {
diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp
index da2aaddcf50..bb1f295cee4 100644
--- a/src/Functions/hasSubsequence.cpp
+++ b/src/Functions/hasSubsequence.cpp
@@ -10,7 +10,7 @@ namespace
 
 struct HasSubsequenceCaseSensitiveASCII
 {
-    static void toLowerIfNeed(std::string & /*s*/) { }
+    static void toLowerIfNeed(String & /*s*/) { }
 };
 
 struct NameHasSubsequence
diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp
index f5c13a7cf8c..fe50ada9be9 100644
--- a/src/Functions/hasSubsequenceCaseInsensitive.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp
@@ -9,7 +9,7 @@ namespace
 
 struct HasSubsequenceCaseInsensitiveASCII
 {
-    static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+    static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
 };
 
 struct NameHasSubsequenceCaseInsensitive
diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
new file mode 100644
index 00000000000..2908c284a25
--- /dev/null
+++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
@@ -0,0 +1,28 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsStringSearch.h>
+#include <Functions/HasSubsequenceImpl.h>
+
+namespace DB
+{
+namespace
+{
+
+struct HasSubsequenceCaseInsensitiveUTF8
+{
+    static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+};
+
+struct NameHasSubsequenceCaseInsensitiveUTF8
+{
+    static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8";
+};
+
+using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>>;
+}
+
+REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8)
+{
+    factory.registerFunction<FunctionHasSubsequenceCaseInsensitiveUTF8>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp
new file mode 100644
index 00000000000..c0811de6575
--- /dev/null
+++ b/src/Functions/hasSubsequenceUTF8.cpp
@@ -0,0 +1,29 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsStringSearch.h>
+#include <Functions/HasSubsequenceImpl.h>
+
+
+namespace DB
+{
+namespace
+{
+
+struct HasSubsequenceCaseSensitiveUTF8
+{
+    static void toLowerIfNeed(String & /*s*/) { }
+};
+
+struct NameHasSubsequenceUTF8
+{
+    static constexpr auto name = "hasSubsequenceUTF8";
+};
+
+using FunctionHasSubsequenceUTF8 = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>>;
+}
+
+REGISTER_FUNCTION(hasSubsequenceUTF8)
+{
+    factory.registerFunction<FunctionHasSubsequenceUTF8>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
index 827caa105d0..d12c0ba9fb3 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.reference
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -1,3 +1,4 @@
+hasSubsequence / const / const
 1
 1
 1
@@ -10,7 +11,17 @@
 0
 0
 0
+hasSubsequence / const / string
 1
 1
+0
+hasSubsequence / string / const
+1
+1
+0
+hasSubsequence / string / string
+1
+1
+0
+hasSubsequenceCaseInsensitive / const / const
 1
-0
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index 63ffb49dc54..64f3fd8dc77 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -1,3 +1,4 @@
+select 'hasSubsequence / const / const';
 select hasSubsequence('garbage', '');
 select hasSubsequence('garbage', 'g');
 select hasSubsequence('garbage', 'a');
@@ -12,8 +13,23 @@ select hasSubsequence('garbage', 'garbage1');
 select hasSubsequence('garbage', 'arbw');
 select hasSubsequence('garbage', 'ARG');
 
-select hasSubsequenceCaseInsensitive('garbage', 'ARG');
+select 'hasSubsequence / const / string';
+select hasSubsequence('garbage', materialize(''));
+select hasSubsequence('garbage', materialize('arg'));
+select hasSubsequence('garbage', materialize('arbw'));
+
+select 'hasSubsequence / string / const';
+select hasSubsequence(materialize('garbage'), '');
+select hasSubsequence(materialize('garbage'), 'arg');
+select hasSubsequence(materialize('garbage'), 'arbw');
+
+select 'hasSubsequence / string / string';
 
 select hasSubsequence(materialize('garbage'), materialize(''));
 select hasSubsequence(materialize('garbage'), materialize('arg'));
-select hasSubsequence(materialize('garbage'), materialize('garbage1'));
\ No newline at end of file
+select hasSubsequence(materialize('garbage'), materialize('garbage1'));
+
+select 'hasSubsequenceCaseInsensitive / const / const';
+
+select hasSubsequenceCaseInsensitive('garbage', 'ARG');
+

From ef3551fea00b6eeaa76884880a977e9a0768bb82 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 7 Jul 2023 03:54:10 +0200
Subject: [PATCH 054/242] Maybe better tests

---
 tests/queries/0_stateless/00995_exception_while_insert.sh      | 3 +--
 tests/queries/0_stateless/01030_limit_by_with_ties_error.sh    | 3 +--
 tests/queries/0_stateless/01187_set_profile_as_setting.sh      | 2 +-
 tests/queries/0_stateless/01442_merge_detach_attach_long.sh    | 3 +--
 tests/queries/0_stateless/01515_logtrace_function.sh           | 3 +--
 .../01583_parallel_parsing_exception_with_offset.sh            | 3 +--
 tests/queries/0_stateless/02359_send_logs_source_regexp.sh     | 2 +-
 .../0_stateless/02360_rename_table_along_with_log_name.sh      | 2 +-
 8 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/00995_exception_while_insert.sh b/tests/queries/0_stateless/00995_exception_while_insert.sh
index e0cd264a2b7..927ac6a54e5 100755
--- a/tests/queries/0_stateless/00995_exception_while_insert.sh
+++ b/tests/queries/0_stateless/00995_exception_while_insert.sh
@@ -1,11 +1,10 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
-
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"
 
 $CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;"
diff --git a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh
index 711a015f044..c3414838789 100755
--- a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh
+++ b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh
@@ -1,11 +1,10 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
-
 $CLICKHOUSE_CLIENT --query="""
 	SELECT * FROM (SELECT number % 5 AS a, count() AS b, c FROM numbers(10)
 		ARRAY JOIN [1,2] AS c GROUP BY a,c) AS table
diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
index dacb609d790..fccac57aea8 100755
--- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh
+++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
@@ -4,13 +4,13 @@
 unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 $CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';"
 $CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';"
 $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK"
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g')
 $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';"
 
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'"
diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
index c080dded1c8..acb2550d48c 100755
--- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
+++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
@@ -4,11 +4,10 @@
 set -e
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
-
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t"
 ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()"
 
diff --git a/tests/queries/0_stateless/01515_logtrace_function.sh b/tests/queries/0_stateless/01515_logtrace_function.sh
index 131ec0edb9e..4ebecd0cc18 100755
--- a/tests/queries/0_stateless/01515_logtrace_function.sh
+++ b/tests/queries/0_stateless/01515_logtrace_function.sh
@@ -2,9 +2,8 @@
 # Tags: race
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=debug
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g')
-
 ${CLICKHOUSE_CLIENT} --query="SELECT logTrace('logTrace Function Test');" 2>&1 | grep -q "logTrace Function Test" && echo "OK" || echo "FAIL"
diff --git a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh
index aa3a25096c0..00d22cb8e83 100755
--- a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh
+++ b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh
@@ -1,11 +1,10 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
-
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"
 
 $CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64) ENGINE = Memory;"
diff --git a/tests/queries/0_stateless/02359_send_logs_source_regexp.sh b/tests/queries/0_stateless/02359_send_logs_source_regexp.sh
index d3b60bc59f4..f287e323ca7 100755
--- a/tests/queries/0_stateless/02359_send_logs_source_regexp.sh
+++ b/tests/queries/0_stateless/02359_send_logs_source_regexp.sh
@@ -1,11 +1,11 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 [ ! -z "$CLICKHOUSE_CLIENT_REDEFINED" ] && CLICKHOUSE_CLIENT=$CLICKHOUSE_CLIENT_REDEFINED
 
-CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g')
 regexp="executeQuery|InterpreterSelectQuery"
 $CLICKHOUSE_CLIENT --send_logs_source_regexp "$regexp" -q "SELECT 1;" 2> >(grep -v -E "$regexp" 1>&2)
diff --git a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
index e8c7f844b5c..c07dcdd549b 100755
--- a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
+++ b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
@@ -11,7 +12,6 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS y;"
 $CLICKHOUSE_CLIENT -q "CREATE TABLE x(i int) ENGINE MergeTree ORDER BY i;"
 $CLICKHOUSE_CLIENT -q "RENAME TABLE x TO y;"
 
-CLICKHOUSE_CLIENT_WITH_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g')
 regexp="${CLICKHOUSE_DATABASE}\\.x" # Check if there are still log entries with old table name
 $CLICKHOUSE_CLIENT_WITH_LOG --send_logs_source_regexp "$regexp" -q "INSERT INTO y VALUES(1);"
 

From 39d0b309bd730748b52acfb32de729e8f8496f83 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Fri, 7 Jul 2023 13:15:26 +0000
Subject: [PATCH 055/242] Make own function with slices

---
 src/Functions/HasSubsequenceImpl.h            | 187 ++++++++----------
 src/Functions/hasSubsequence.cpp              |   2 +-
 .../hasSubsequenceCaseInsensitive.cpp         |   2 +-
 .../hasSubsequenceCaseInsensitiveUTF8.cpp     |   2 +-
 src/Functions/hasSubsequenceUTF8.cpp          |   2 +-
 5 files changed, 84 insertions(+), 111 deletions(-)

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index bcb8e8e99e6..1396e64ade5 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -1,124 +1,109 @@
 #pragma once
+
+#include <Columns/ColumnString.h>
+#include <Functions/GatherUtils/Sources.h>
+#include <Functions/GatherUtils/Sinks.h>
+#include <Functions/GatherUtils/Algorithms.h>
+#include <Functions/GatherUtils/Sinks.h>
 namespace DB
 {
 namespace
 {
-template <typename Name, typename Impl>
-struct HasSubsequenceImpl
-{
-    using ResultType = UInt8;
 
-    static constexpr bool use_default_implementation_for_constants = false;
-    static constexpr bool supports_start_pos = false;
+using namespace GatherUtils;
+
+template <typename Name, typename Impl>
+class FunctionsHasSubsequenceImpl : public IFunction
+{
+public:
     static constexpr auto name = Name::name;
 
-    static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};}
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionsHasSubsequenceImpl>(); }
 
-    static void vectorConstant(
-        const ColumnString::Chars & haystack_data,
-        const ColumnString::Offsets & haystack_offsets,
-        const String & needle,
-        const ColumnPtr & /*start_pos*/,
-        PaddedPODArray<UInt8> & res,
-        [[maybe_unused]] ColumnUInt8 * /*res_null*/)
+    String getName() const override { return name; }
+
+    bool isVariadic() const override { return false; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    bool useDefaultImplementationForConstants() const override { return false; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {};}
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        if (needle.empty())
-        {
-            for (auto & r : res)
-                r = 1;
-            return;
-        }
+        if (!isString(arguments[0]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of argument of function {}",
+                arguments[0]->getName(), getName());
 
-        ColumnString::Offset prev_haystack_offset = 0;
-        for (size_t i = 0; i < haystack_offsets.size(); ++i)
-        {
-            size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
-            const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
-            res[i] = hasSubsequence(haystack, haystack_size, needle.c_str(), needle.size());
-            prev_haystack_offset = haystack_offsets[i];
-        }
+        if (!isString(arguments[1]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of argument of function {}",
+                arguments[1]->getName(), getName());
+
+        return std::make_shared<DataTypeNumber<UInt8>>();
     }
 
-    static void vectorVector(
-        const ColumnString::Chars & haystack_data,
-        const ColumnString::Offsets & haystack_offsets,
-        const ColumnString::Chars & needle_data,
-        const ColumnString::Offsets & needle_offsets,
-        const ColumnPtr & /*start_pos*/,
-        PaddedPODArray<UInt8> & res,
-        ColumnUInt8 * /*res_null*/)
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
     {
-        ColumnString::Offset prev_haystack_offset = 0;
-        ColumnString::Offset prev_needle_offset = 0;
+        const ColumnPtr & column_haystack = arguments[0].column;
+        const ColumnPtr & column_needle = arguments[1].column;
 
-        size_t size = haystack_offsets.size();
+        const ColumnConst * haystack_const_string = checkAndGetColumnConst<ColumnString>(column_haystack.get());
+        const ColumnConst * needle_const_string = checkAndGetColumnConst<ColumnString>(column_needle.get());
+        const ColumnString * haystack_string = checkAndGetColumn<ColumnString>(&*column_haystack);
+        const ColumnString * needle_string = checkAndGetColumn<ColumnString>(&*column_needle);
 
-        for (size_t i = 0; i < size; ++i)
-        {
-            size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
-            size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
+        auto col_res = ColumnVector<UInt8>::create();
+        typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
+        vec_res.resize(input_rows_count);
 
-            if (0 == needle_size)
-            {
-                res[i] = 1;
-            }
-            else
-            {
-                const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
-                const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
-                res[i] = hasSubsequence(haystack, haystack_size, needle, needle_size);
-            }
+        if (haystack_string && needle_string)
+            execute(StringSource{*haystack_string}, StringSource{*needle_string}, vec_res);
+        else if (haystack_string && needle_const_string)
+            execute(StringSource{*haystack_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
+        else if (haystack_const_string && needle_string)
+            execute(ConstSource<StringSource>{*haystack_const_string}, StringSource{*needle_string}, vec_res);
+        else if (haystack_const_string && needle_const_string)
+            execute(ConstSource<StringSource>{*haystack_const_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {}, first argument of function {} must be a string",
+                arguments[0].column->getName(),
+                getName());
 
-            prev_haystack_offset = haystack_offsets[i];
-            prev_needle_offset = needle_offsets[i];
-        }
+        return col_res;
     }
 
-    static void constantVector(
-        const String & haystack,
-        const ColumnString::Chars & needle_data,
-        const ColumnString::Offsets & needle_offsets,
-        const ColumnPtr & /*start_pos*/,
-        PaddedPODArray<UInt8> & res,
-        ColumnUInt8 * /*res_null*/)
+private:
+
+    template <typename SourceHaystack, typename SourceNeedle>
+    void execute(
+        SourceHaystack && haystacks,
+        SourceNeedle && needles,
+        PaddedPODArray<UInt8> & res_data) const
     {
-        ColumnString::Offset prev_needle_offset = 0;
+        size_t row_num = 0;
 
-        size_t size = needle_offsets.size();
-
-        for (size_t i = 0; i < size; ++i)
+        while (!haystacks.isEnd())
         {
-            size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
+            [[maybe_unused]] auto haystack_slice = haystacks.getWhole();
+            [[maybe_unused]] auto needle_slice = needles.getWhole();
 
-            if (0 == needle_size)
-            {
-                res[i] = 1;
-            }
-            else
-            {
-                const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
-                res[i] = hasSubsequence(haystack.c_str(), haystack.size(), needle, needle_size);
-            }
-            prev_needle_offset = needle_offsets[i];
-        }
-    }
+            auto haystack = std::string(reinterpret_cast<const char *>(haystack_slice.data), haystack_slice.size);
+            auto needle = std::string(reinterpret_cast<const char *>(needle_slice.data), needle_slice.size);
 
-    static void constantConstant(
-        String haystack,
-        String needle,
-        const ColumnPtr & /*start_pos*/,
-        PaddedPODArray<UInt8> & res,
-        ColumnUInt8 * /*res_null*/)
-    {
-        size_t size = res.size();
-        Impl::toLowerIfNeed(haystack);
-        Impl::toLowerIfNeed(needle);
+            Impl::toLowerIfNeed(haystack);
+            Impl::toLowerIfNeed(needle);
 
-        UInt8 result = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            res[i] = result;
+            res_data[row_num] = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+            haystacks.next();
+            needles.next();
+            ++row_num;
         }
     }
 
@@ -130,18 +115,6 @@ struct HasSubsequenceImpl
                 ++j;
         return j == needle_size;
     }
-
-    template <typename... Args>
-    static void vectorFixedConstant(Args &&...)
-    {
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name);
-    }
-
-    template <typename... Args>
-    static void vectorFixedVector(Args &&...)
-    {
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name);
-    }
 };
 
 }
diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp
index bb1f295cee4..900e80f5524 100644
--- a/src/Functions/hasSubsequence.cpp
+++ b/src/Functions/hasSubsequence.cpp
@@ -18,7 +18,7 @@ struct NameHasSubsequence
     static constexpr auto name = "hasSubsequence";
 };
 
-using FunctionHasSubsequence = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>>;
+using FunctionHasSubsequence = FunctionsHasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>;
 }
 
 REGISTER_FUNCTION(hasSubsequence)
diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp
index fe50ada9be9..dbac62d7f09 100644
--- a/src/Functions/hasSubsequenceCaseInsensitive.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp
@@ -17,7 +17,7 @@ struct NameHasSubsequenceCaseInsensitive
     static constexpr auto name = "hasSubsequenceCaseInsensitive";
 };
 
-using FunctionHasSubsequenceCaseInsensitive = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>>;
+using FunctionHasSubsequenceCaseInsensitive = FunctionsHasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceCaseInsensitive)
diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
index 2908c284a25..c104ff52857 100644
--- a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
@@ -17,7 +17,7 @@ struct NameHasSubsequenceCaseInsensitiveUTF8
     static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8";
 };
 
-using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>>;
+using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsHasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8)
diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp
index c0811de6575..c67ce7d9c74 100644
--- a/src/Functions/hasSubsequenceUTF8.cpp
+++ b/src/Functions/hasSubsequenceUTF8.cpp
@@ -18,7 +18,7 @@ struct NameHasSubsequenceUTF8
     static constexpr auto name = "hasSubsequenceUTF8";
 };
 
-using FunctionHasSubsequenceUTF8 = FunctionsStringSearch<HasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>>;
+using FunctionHasSubsequenceUTF8 = FunctionsHasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceUTF8)

From 8e4c8f118cf64fcd77524439508b838c05a58fcf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 8 Jul 2023 09:07:05 +0200
Subject: [PATCH 056/242] Fix disaster in integration tests, part 2

---
 tests/integration/ci-runner.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index d6d17abe725..43184574e6e 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -406,9 +406,9 @@ class ClickhouseIntegrationTestsRunner:
         out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log")
         cmd = (
             "cd {repo_path}/tests/integration && "
-            "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' "
-            "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' "
-            "| grep -v 'SKIPPED' | sort -u  > {out_file}".format(
+            "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' "
+            "| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' "
+            "| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format(
                 repo_path=repo_path,
                 runner_opts=self._get_runner_opts(),
                 image_cmd=image_cmd,
@@ -626,7 +626,7 @@ class ClickhouseIntegrationTestsRunner:
             info_basename = test_group_str + "_" + str(i) + ".nfo"
             info_path = os.path.join(repo_path, "tests/integration", info_basename)
 
-            test_cmd = " ".join([test for test in sorted(test_names)])
+            test_cmd = " ".join([f"'{test}'" for test in sorted(test_names)])
             parallel_cmd = (
                 " --parallel {} ".format(num_workers) if num_workers > 0 else ""
             )

From 62bfa4ed93fb3796eccb0df041a9dfa057583c9b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Jul 2023 02:21:48 +0200
Subject: [PATCH 057/242] Fix performance test for regexp cache

---
 src/Functions/Regexps.h                 | 4 +++-
 tests/performance/re2_regex_caching.xml | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h
index 4bfd10bdbf5..aa8ae5b4054 100644
--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@@ -11,6 +11,7 @@
 #include <Common/OptimizedRegularExpression.h>
 #include <Common/ProfileEvents.h>
 #include <Common/likePatternToRegexp.h>
+#include <Common/HashTable/Hash.h>
 #include <base/defines.h>
 #include <base/StringRef.h>
 #include <boost/container_hash/hash.hpp>
@@ -21,6 +22,7 @@
 #    include <hs.h>
 #endif
 
+
 namespace ProfileEvents
 {
 extern const Event RegexpCreated;
@@ -86,7 +88,7 @@ public:
 private:
     constexpr static size_t CACHE_SIZE = 100; /// collision probability
 
-    std::hash<String> hasher;
+    DefaultHash<String> hasher;
     struct Bucket
     {
         String pattern;   /// key
diff --git a/tests/performance/re2_regex_caching.xml b/tests/performance/re2_regex_caching.xml
index 6edc83097ba..9778a8d4c0c 100644
--- a/tests/performance/re2_regex_caching.xml
+++ b/tests/performance/re2_regex_caching.xml
@@ -24,8 +24,8 @@
                 <value>'.*' || toString(number) || '.'</value>
                 <!-- simple patterns, low distinctness (10 patterns) -->
                 <value>'.*' || toString(number % 10) || '.'</value>
-                <!-- complex patterns, all unique -->
-                <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value>
+                <!-- complex patterns, all unique - this is very slow (from 2 to 15 seconds) -->
+                <!-- <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value> -->
                 <!-- complex patterns, low distinctness -->
                 <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10)</value>
                 <!-- Note: for this benchmark, we are only interested in compilation time, not correctness, evaluation time or the result.

From 1f72b1bb09a2ca0c22fab5d8cb740e4be87af1a6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Jul 2023 03:28:12 +0300
Subject: [PATCH 058/242] Update ci-runner.py

---
 tests/integration/ci-runner.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 43184574e6e..efee74800c6 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -168,7 +168,7 @@ def clear_ip_tables_and_restart_daemons():
     try:
         logging.info("Killing all alive docker containers")
         subprocess.check_output(
-            "timeout -s 9 10m docker ps --quiet | xargs --no-run-if-empty docker kill",
+            "timeout --signal=KILL 10m docker ps --quiet | xargs --no-run-if-empty docker kill",
             shell=True,
         )
     except subprocess.CalledProcessError as err:
@@ -177,7 +177,7 @@ def clear_ip_tables_and_restart_daemons():
     try:
         logging.info("Removing all docker containers")
         subprocess.check_output(
-            "timeout -s 9 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force",
+            "timeout --signal=KILL 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force",
             shell=True,
         )
     except subprocess.CalledProcessError as err:
@@ -297,7 +297,7 @@ class ClickhouseIntegrationTestsRunner:
 
         cmd = (
             "cd {repo_path}/tests/integration && "
-            "timeout -s 9 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
+            "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
                 repo_path=repo_path,
                 runner_opts=self._get_runner_opts(),
                 image_cmd=image_cmd,
@@ -635,7 +635,7 @@ class ClickhouseIntegrationTestsRunner:
             # -E -- (E)rror
             # -p -- (p)assed
             # -s -- (s)kipped
-            cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} '-rfEps --run-id={} --color=no --durations=0 {}' | tee {}".format(
+            cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} '-rfEps --run-id={} --color=no --durations=0 {}' | tee {}".format(
                 repo_path,
                 self._get_runner_opts(),
                 image_cmd,

From 847461160965a18d33a2ce792a0430fd05323ba3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Jul 2023 06:51:42 +0200
Subject: [PATCH 059/242] Partial revert

---
 src/Functions/Regexps.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h
index aa8ae5b4054..4bfd10bdbf5 100644
--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@@ -11,7 +11,6 @@
 #include <Common/OptimizedRegularExpression.h>
 #include <Common/ProfileEvents.h>
 #include <Common/likePatternToRegexp.h>
-#include <Common/HashTable/Hash.h>
 #include <base/defines.h>
 #include <base/StringRef.h>
 #include <boost/container_hash/hash.hpp>
@@ -22,7 +21,6 @@
 #    include <hs.h>
 #endif
 
-
 namespace ProfileEvents
 {
 extern const Event RegexpCreated;
@@ -88,7 +86,7 @@ public:
 private:
     constexpr static size_t CACHE_SIZE = 100; /// collision probability
 
-    DefaultHash<String> hasher;
+    std::hash<String> hasher;
     struct Bucket
     {
         String pattern;   /// key

From ff4ec823424dfb7067b1f3db714c61a6a10869ee Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 07:29:31 +0000
Subject: [PATCH 060/242] Added hasSubsequenceUTF8 impl + tests

---
 src/Functions/HasSubsequenceImpl.h            | 56 +++++++++++++++++--
 src/Functions/hasSubsequence.cpp              |  2 +
 .../hasSubsequenceCaseInsensitive.cpp         |  2 +
 .../hasSubsequenceCaseInsensitiveUTF8.cpp     |  4 +-
 src/Functions/hasSubsequenceUTF8.cpp          |  4 +-
 .../02809_has_subsequence.reference           | 22 ++++++++
 .../0_stateless/02809_has_subsequence.sql     | 24 ++++++++
 7 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index 1396e64ade5..10fe6215280 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -96,11 +96,18 @@ private:
 
             auto haystack = std::string(reinterpret_cast<const char *>(haystack_slice.data), haystack_slice.size);
             auto needle = std::string(reinterpret_cast<const char *>(needle_slice.data), needle_slice.size);
+            
+            if constexpr (!Impl::is_utf8)
+            {
+                Impl::toLowerIfNeed(haystack);
+                Impl::toLowerIfNeed(needle);
 
-            Impl::toLowerIfNeed(haystack);
-            Impl::toLowerIfNeed(needle);
-
-            res_data[row_num] = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+                res_data[row_num] = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+            }
+            else
+            {
+                res_data[row_num] = hasSubsequenceUTF8(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
+            }
             haystacks.next();
             needles.next();
             ++row_num;
@@ -115,6 +122,47 @@ private:
                 ++j;
         return j == needle_size;
     }
+
+    static UInt8 hasSubsequenceUTF8(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
+    {
+        const auto * haystack_pos = haystack;
+        const auto * needle_pos = needle;
+        const auto * haystack_end = haystack + haystack_size;
+        const auto * needle_end = needle + needle_size;
+
+        if (!needle_size)
+        {
+            return 1;
+        }
+
+        auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+        auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+        if (!haystack_code_point || !needle_code_point)
+        {
+            return 0;
+        }
+       
+        while (true)
+        {
+            if (needle_code_point == haystack_code_point)
+            {
+                needle_pos += UTF8::seqLength(*needle_pos);
+                if (needle_pos == needle_end)
+                {
+                    break;
+                }
+                needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+            }
+            haystack_pos += UTF8::seqLength(*haystack_pos);
+            if (haystack_pos == haystack_end)
+            {
+                break;
+            }
+            haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+        }
+                
+        return needle_pos == needle_end;
+    }
 };
 
 }
diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp
index 900e80f5524..acc574c8207 100644
--- a/src/Functions/hasSubsequence.cpp
+++ b/src/Functions/hasSubsequence.cpp
@@ -10,6 +10,8 @@ namespace
 
 struct HasSubsequenceCaseSensitiveASCII
 {
+    static constexpr bool is_utf8 = false;
+
     static void toLowerIfNeed(String & /*s*/) { }
 };
 
diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp
index dbac62d7f09..68c510794c3 100644
--- a/src/Functions/hasSubsequenceCaseInsensitive.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp
@@ -9,6 +9,8 @@ namespace
 
 struct HasSubsequenceCaseInsensitiveASCII
 {
+    static constexpr bool is_utf8 = false;
+
     static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
 };
 
diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
index c104ff52857..d1fb2f5152a 100644
--- a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
@@ -9,7 +9,9 @@ namespace
 
 struct HasSubsequenceCaseInsensitiveUTF8
 {
-    static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+    static constexpr bool is_utf8 = true;
+
+    //static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
 };
 
 struct NameHasSubsequenceCaseInsensitiveUTF8
diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp
index c67ce7d9c74..fcdcd28e02b 100644
--- a/src/Functions/hasSubsequenceUTF8.cpp
+++ b/src/Functions/hasSubsequenceUTF8.cpp
@@ -10,7 +10,9 @@ namespace
 
 struct HasSubsequenceCaseSensitiveUTF8
 {
-    static void toLowerIfNeed(String & /*s*/) { }
+    static constexpr bool is_utf8 = true;
+
+   // static void toLowerIfNeed(String & /*s*/) { }
 };
 
 struct NameHasSubsequenceUTF8
diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
index d12c0ba9fb3..8437a7f4b74 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.reference
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -1,6 +1,7 @@
 hasSubsequence / const / const
 1
 1
+0
 1
 1
 1
@@ -24,4 +25,25 @@ hasSubsequence / string / string
 1
 0
 hasSubsequenceCaseInsensitive / const / const
+0
 1
+1
+hasSubsequenceCaseInsensitive / string / string
+0
+1
+1
+hasSubsequenceUTF8 / const / const
+1
+1
+0
+1
+0
+1
+0
+1
+1
+0
+1
+0
+1
+0
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index 64f3fd8dc77..b8d3280488c 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -1,6 +1,7 @@
 select 'hasSubsequence / const / const';
 select hasSubsequence('garbage', '');
 select hasSubsequence('garbage', 'g');
+select hasSubsequence('garbage', 'G');
 select hasSubsequence('garbage', 'a');
 select hasSubsequence('garbage', 'e');
 select hasSubsequence('garbage', 'gr');
@@ -31,5 +32,28 @@ select hasSubsequence(materialize('garbage'), materialize('garbage1'));
 
 select 'hasSubsequenceCaseInsensitive / const / const';
 
+select hasSubsequenceCaseInsensitive('garbage', 'w');
 select hasSubsequenceCaseInsensitive('garbage', 'ARG');
+select hasSubsequenceCaseInsensitive('GARGAGE', 'arg');
 
+select 'hasSubsequenceCaseInsensitive / string / string';
+select hasSubsequenceCaseInsensitive(materialize('garbage'), materialize('w'));
+select hasSubsequenceCaseInsensitive(materialize('garbage'), materialize('ARG'));
+select hasSubsequenceCaseInsensitive(materialize('GARGAGE'), materialize('arg'));
+
+select 'hasSubsequenceUTF8 / const / const';
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', '');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'C');     -- eng
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'С');     -- cyrilic
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'House');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'house');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'система');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'Система');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'ссубд');
+
+select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), 'субд');
+select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), 'суббд');
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', materialize('стул'));
+select hasSubsequence('ClickHouse - столбцовая система управления базами данных', materialize('два стула'));
+select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), materialize('орех'));
+select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), materialize('два ореха'));
\ No newline at end of file

From 17891ca1ebb198785d9f8de7bfcef4203bc37d9e Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 09:18:09 +0000
Subject: [PATCH 061/242] Add case ins utf8 impl + tests

---
 .../functions/string-search-functions.md      |  2 +-
 .../functions/string-search-functions.md      |  2 +-
 src/Functions/HasSubsequenceImpl.h            | 66 ++++++++-----------
 src/Functions/hasSubsequence.cpp              |  5 +-
 .../hasSubsequenceCaseInsensitive.cpp         |  5 +-
 .../hasSubsequenceCaseInsensitiveUTF8.cpp     |  7 +-
 src/Functions/hasSubsequenceUTF8.cpp          |  5 +-
 .../02809_has_subsequence.reference           | 20 ++++--
 .../0_stateless/02809_has_subsequence.sql     | 31 +++++----
 .../aspell-ignore/en/aspell-dict.txt          |  5 ++
 10 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 04ad6474310..c10a1036677 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -647,7 +647,7 @@ hasSubsequence(haystack, needle)
 **Arguments**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
 **Returned values**
 
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index 21989e882b6..6e3830869cd 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -816,7 +816,7 @@ hasSubsequence(haystack, needle)
 **Аргументы**
 
 -   `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
--   `needle` — подстрока, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
+-   `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
 
 **Возвращаемые значения**
 
diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index 10fe6215280..ea1826e1e33 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -3,22 +3,27 @@
 #include <Columns/ColumnString.h>
 #include <Functions/GatherUtils/Sources.h>
 #include <Functions/GatherUtils/Sinks.h>
-#include <Functions/GatherUtils/Algorithms.h>
-#include <Functions/GatherUtils/Sinks.h>
+
 namespace DB
 {
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_COLUMN;
+}
 namespace
 {
 
 using namespace GatherUtils;
 
 template <typename Name, typename Impl>
-class FunctionsHasSubsequenceImpl : public IFunction
+class HasSubsequenceImpl : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionsHasSubsequenceImpl>(); }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<HasSubsequenceImpl>(); }
 
     String getName() const override { return name; }
 
@@ -72,8 +77,9 @@ public:
         else
             throw Exception(
                 ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {}, first argument of function {} must be a string",
+                "Illegal columns {} and {} of arguments of function {}",
                 arguments[0].column->getName(),
+                arguments[1].column->getName(),
                 getName());
 
         return col_res;
@@ -87,43 +93,32 @@ private:
         SourceNeedle && needles,
         PaddedPODArray<UInt8> & res_data) const
     {
-        size_t row_num = 0;
-
         while (!haystacks.isEnd())
         {
-            [[maybe_unused]] auto haystack_slice = haystacks.getWhole();
-            [[maybe_unused]] auto needle_slice = needles.getWhole();
+            auto haystack_slice = haystacks.getWhole();
+            auto needle_slice = needles.getWhole();
+            size_t row_num = haystacks.rowNum();
 
-            auto haystack = std::string(reinterpret_cast<const char *>(haystack_slice.data), haystack_slice.size);
-            auto needle = std::string(reinterpret_cast<const char *>(needle_slice.data), needle_slice.size);
-            
             if constexpr (!Impl::is_utf8)
-            {
-                Impl::toLowerIfNeed(haystack);
-                Impl::toLowerIfNeed(needle);
-
-                res_data[row_num] = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
-            }
+                res_data[row_num] = hasSubsequence(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
             else
-            {
-                res_data[row_num] = hasSubsequenceUTF8(haystack.c_str(), haystack.size(), needle.c_str(), needle.size());
-            }
+                res_data[row_num] = hasSubsequenceUTF8(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
+
             haystacks.next();
             needles.next();
-            ++row_num;
         }
     }
 
-    static UInt8 hasSubsequence(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
+    static UInt8 hasSubsequence(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
     {
         size_t j = 0;
         for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
-            if (needle[j] == haystack[i])
+            if (Impl::toLowerIfNeed(needle[j]) == Impl::toLowerIfNeed(haystack[i]))
                 ++j;
         return j == needle_size;
     }
 
-    static UInt8 hasSubsequenceUTF8(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
+    static UInt8 hasSubsequenceUTF8(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
     {
         const auto * haystack_pos = haystack;
         const auto * needle_pos = needle;
@@ -131,36 +126,27 @@ private:
         const auto * needle_end = needle + needle_size;
 
         if (!needle_size)
-        {
             return 1;
-        }
 
         auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
         auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
         if (!haystack_code_point || !needle_code_point)
-        {
             return 0;
-        }
-       
-        while (true)
-        {
-            if (needle_code_point == haystack_code_point)
+
+        while (haystack_code_point && needle_code_point)
+        {   
+            if (Impl::toLowerIfNeed(*needle_code_point) == Impl::toLowerIfNeed(*haystack_code_point))
             {
                 needle_pos += UTF8::seqLength(*needle_pos);
-                if (needle_pos == needle_end)
-                {
+                if (needle_pos >= needle_end)
                     break;
-                }
                 needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
             }
             haystack_pos += UTF8::seqLength(*haystack_pos);
-            if (haystack_pos == haystack_end)
-            {
+            if (haystack_pos >= haystack_end)
                 break;
-            }
             haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
         }
-                
         return needle_pos == needle_end;
     }
 };
diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp
index acc574c8207..4bcce53b4db 100644
--- a/src/Functions/hasSubsequence.cpp
+++ b/src/Functions/hasSubsequence.cpp
@@ -1,5 +1,4 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringSearch.h>
 #include <Functions/HasSubsequenceImpl.h>
 
 
@@ -12,7 +11,7 @@ struct HasSubsequenceCaseSensitiveASCII
 {
     static constexpr bool is_utf8 = false;
 
-    static void toLowerIfNeed(String & /*s*/) { }
+    static int toLowerIfNeed(int c) { return c; }
 };
 
 struct NameHasSubsequence
@@ -20,7 +19,7 @@ struct NameHasSubsequence
     static constexpr auto name = "hasSubsequence";
 };
 
-using FunctionHasSubsequence = FunctionsHasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>;
+using FunctionHasSubsequence = HasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>;
 }
 
 REGISTER_FUNCTION(hasSubsequence)
diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp
index 68c510794c3..c93bbead58c 100644
--- a/src/Functions/hasSubsequenceCaseInsensitive.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp
@@ -1,5 +1,4 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringSearch.h>
 #include <Functions/HasSubsequenceImpl.h>
 
 namespace DB
@@ -11,7 +10,7 @@ struct HasSubsequenceCaseInsensitiveASCII
 {
     static constexpr bool is_utf8 = false;
 
-    static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+    static int toLowerIfNeed(int c) { return std::tolower(c); }
 };
 
 struct NameHasSubsequenceCaseInsensitive
@@ -19,7 +18,7 @@ struct NameHasSubsequenceCaseInsensitive
     static constexpr auto name = "hasSubsequenceCaseInsensitive";
 };
 
-using FunctionHasSubsequenceCaseInsensitive = FunctionsHasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>;
+using FunctionHasSubsequenceCaseInsensitive = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceCaseInsensitive)
diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
index d1fb2f5152a..18438bc8b16 100644
--- a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
+++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
@@ -1,7 +1,8 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringSearch.h>
 #include <Functions/HasSubsequenceImpl.h>
 
+#include "Poco/Unicode.h"
+
 namespace DB
 {
 namespace
@@ -11,7 +12,7 @@ struct HasSubsequenceCaseInsensitiveUTF8
 {
     static constexpr bool is_utf8 = true;
 
-    //static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
+    static int toLowerIfNeed(int code_point) { return Poco::Unicode::toLower(code_point); }
 };
 
 struct NameHasSubsequenceCaseInsensitiveUTF8
@@ -19,7 +20,7 @@ struct NameHasSubsequenceCaseInsensitiveUTF8
     static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8";
 };
 
-using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsHasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>;
+using FunctionHasSubsequenceCaseInsensitiveUTF8 = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8)
diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp
index fcdcd28e02b..7a22211eb8c 100644
--- a/src/Functions/hasSubsequenceUTF8.cpp
+++ b/src/Functions/hasSubsequenceUTF8.cpp
@@ -1,5 +1,4 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsStringSearch.h>
 #include <Functions/HasSubsequenceImpl.h>
 
 
@@ -12,7 +11,7 @@ struct HasSubsequenceCaseSensitiveUTF8
 {
     static constexpr bool is_utf8 = true;
 
-   // static void toLowerIfNeed(String & /*s*/) { }
+    static int toLowerIfNeed(int code_point) { return code_point; }
 };
 
 struct NameHasSubsequenceUTF8
@@ -20,7 +19,7 @@ struct NameHasSubsequenceUTF8
     static constexpr auto name = "hasSubsequenceUTF8";
 };
 
-using FunctionHasSubsequenceUTF8 = FunctionsHasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>;
+using FunctionHasSubsequenceUTF8 = HasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>;
 }
 
 REGISTER_FUNCTION(hasSubsequenceUTF8)
diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
index 8437a7f4b74..0bf8e4e3a36 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.reference
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -1,4 +1,4 @@
-hasSubsequence / const / const
+hasSubsequence
 1
 1
 0
@@ -12,27 +12,23 @@ hasSubsequence / const / const
 0
 0
 0
-hasSubsequence / const / string
 1
 1
 0
-hasSubsequence / string / const
 1
 1
 0
-hasSubsequence / string / string
 1
 1
 0
-hasSubsequenceCaseInsensitive / const / const
+hasSubsequenceCaseInsensitive
 0
 1
 1
-hasSubsequenceCaseInsensitive / string / string
 0
 1
 1
-hasSubsequenceUTF8 / const / const
+hasSubsequenceUTF8
 1
 1
 0
@@ -47,3 +43,13 @@ hasSubsequenceUTF8 / const / const
 0
 1
 0
+hasSubsequenceCaseInsensitiveUTF8
+0
+1
+1
+1
+0
+1
+0
+1
+0
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index b8d3280488c..6715d901309 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -1,4 +1,4 @@
-select 'hasSubsequence / const / const';
+select 'hasSubsequence';
 select hasSubsequence('garbage', '');
 select hasSubsequence('garbage', 'g');
 select hasSubsequence('garbage', 'G');
@@ -9,39 +9,28 @@ select hasSubsequence('garbage', 'ab');
 select hasSubsequence('garbage', 'be');
 select hasSubsequence('garbage', 'arg');
 select hasSubsequence('garbage', 'garbage');
-
 select hasSubsequence('garbage', 'garbage1');
 select hasSubsequence('garbage', 'arbw');
 select hasSubsequence('garbage', 'ARG');
-
-select 'hasSubsequence / const / string';
 select hasSubsequence('garbage', materialize(''));
 select hasSubsequence('garbage', materialize('arg'));
 select hasSubsequence('garbage', materialize('arbw'));
-
-select 'hasSubsequence / string / const';
 select hasSubsequence(materialize('garbage'), '');
 select hasSubsequence(materialize('garbage'), 'arg');
 select hasSubsequence(materialize('garbage'), 'arbw');
-
-select 'hasSubsequence / string / string';
-
 select hasSubsequence(materialize('garbage'), materialize(''));
 select hasSubsequence(materialize('garbage'), materialize('arg'));
 select hasSubsequence(materialize('garbage'), materialize('garbage1'));
 
-select 'hasSubsequenceCaseInsensitive / const / const';
-
+select 'hasSubsequenceCaseInsensitive';
 select hasSubsequenceCaseInsensitive('garbage', 'w');
 select hasSubsequenceCaseInsensitive('garbage', 'ARG');
 select hasSubsequenceCaseInsensitive('GARGAGE', 'arg');
-
-select 'hasSubsequenceCaseInsensitive / string / string';
 select hasSubsequenceCaseInsensitive(materialize('garbage'), materialize('w'));
 select hasSubsequenceCaseInsensitive(materialize('garbage'), materialize('ARG'));
 select hasSubsequenceCaseInsensitive(materialize('GARGAGE'), materialize('arg'));
 
-select 'hasSubsequenceUTF8 / const / const';
+select 'hasSubsequenceUTF8';
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', '');
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'C');     -- eng
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'С');     -- cyrilic
@@ -50,10 +39,20 @@ select hasSubsequence('ClickHouse - столбцовая система упра
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'система');
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'Система');
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', 'ссубд');
-
 select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), 'субд');
 select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), 'суббд');
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', materialize('стул'));
 select hasSubsequence('ClickHouse - столбцовая система управления базами данных', materialize('два стула'));
 select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), materialize('орех'));
-select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), materialize('два ореха'));
\ No newline at end of file
+select hasSubsequence(materialize('ClickHouse - столбцовая система управления базами данных'), materialize('два ореха'));
+
+select 'hasSubsequenceCaseInsensitiveUTF8';
+select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', 'oltp');
+select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', 'оОоОоO');
+select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', 'я раб');
+select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), 'работа');
+select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), 'work');
+select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', materialize('добро)'));
+select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', materialize('зло()'));
+select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика'));
+select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика для аналитиков'));
\ No newline at end of file
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 2802e52c288..270e486586e 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1534,6 +1534,10 @@ hadoop
 halfMD
 halfday
 hardlinks
+hasSubsequence
+hasSubsequenceCaseInsensitive
+hasSubsequenceCaseInsensitiveUTF
+hasSubsequenceUTF
 hasAll
 hasAny
 hasColumnInTable
@@ -2238,6 +2242,7 @@ subquery
 subranges
 subreddits
 subseconds
+subsequence
 substring
 substringUTF
 substrings

From 4cd12a505333e55c7773163e0c6ea5b296600175 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 09:33:53 +0000
Subject: [PATCH 062/242] Remove trailing whitespace

---
 src/Functions/HasSubsequenceImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index ea1826e1e33..fda29820298 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -134,7 +134,7 @@ private:
             return 0;
 
         while (haystack_code_point && needle_code_point)
-        {   
+        {
             if (Impl::toLowerIfNeed(*needle_code_point) == Impl::toLowerIfNeed(*haystack_code_point))
             {
                 needle_pos += UTF8::seqLength(*needle_pos);

From 33405e70f2867e72e498abaeaa1bc8b7317e0284 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 10:01:26 +0000
Subject: [PATCH 063/242] Try to fix build

---
 src/Functions/HasSubsequenceImpl.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index fda29820298..afbf53d45f9 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -1,6 +1,9 @@
 #pragma once
 
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <Functions/GatherUtils/Sources.h>
 #include <Functions/GatherUtils/Sinks.h>
 

From 66e759ec9651cb04e66dc8aeb6f561a0f97812ab Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 10:18:00 +0000
Subject: [PATCH 064/242] try to fix build again

---
 src/Functions/HasSubsequenceImpl.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h
index afbf53d45f9..17955746aa2 100644
--- a/src/Functions/HasSubsequenceImpl.h
+++ b/src/Functions/HasSubsequenceImpl.h
@@ -2,7 +2,6 @@
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnConst.h>
-#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/GatherUtils/Sources.h>
 #include <Functions/GatherUtils/Sinks.h>

From 2c3ba033799c409cd6f7e65057a85e6c09605670 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 10 Jul 2023 12:11:34 +0000
Subject: [PATCH 065/242] Fix 02415_all_new_functions_must_be_documented

---
 .../02415_all_new_functions_must_be_documented.reference      | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index b5c133988e6..d241e2f0d28 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -343,6 +343,10 @@ has
 hasAll
 hasAny
 hasColumnInTable
+hasSubsequence
+hasSubsequenceCaseInsensitive
+hasSubsequenceCaseInsensitiveUTF8
+hasSubsequenceUTF8
 hasSubstr
 hasThreadFuzzer
 hashid

From c9a754dc4b916bbde1d5d11e84fcafac4ed787cb Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 12 Jul 2023 08:47:37 +0300
Subject: [PATCH 066/242] Add more tests

---
 tests/queries/0_stateless/02809_has_subsequence.reference | 2 ++
 tests/queries/0_stateless/02809_has_subsequence.sql       | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
index 0bf8e4e3a36..5f533d7f5bb 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.reference
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -10,6 +10,8 @@ hasSubsequence
 1
 1
 0
+1
+0
 0
 0
 1
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index 6715d901309..dea05369a0e 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -8,6 +8,8 @@ select hasSubsequence('garbage', 'gr');
 select hasSubsequence('garbage', 'ab');
 select hasSubsequence('garbage', 'be');
 select hasSubsequence('garbage', 'arg');
+select hasSubsequence('garbage', 'gra');
+select hasSubsequence('garbage', 'rga');s
 select hasSubsequence('garbage', 'garbage');
 select hasSubsequence('garbage', 'garbage1');
 select hasSubsequence('garbage', 'arbw');
@@ -55,4 +57,4 @@ select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обр
 select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', materialize('добро)'));
 select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', materialize('зло()'));
 select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика'));
-select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика для аналитиков'));
\ No newline at end of file
+select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика для аналитиков'));

From 2eef451f77616ada1974568ef658724636fe382b Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 12 Jul 2023 08:48:58 +0300
Subject: [PATCH 067/242] Remove trash

---
 tests/queries/0_stateless/02809_has_subsequence.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index dea05369a0e..bcc491a95fe 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -9,7 +9,7 @@ select hasSubsequence('garbage', 'ab');
 select hasSubsequence('garbage', 'be');
 select hasSubsequence('garbage', 'arg');
 select hasSubsequence('garbage', 'gra');
-select hasSubsequence('garbage', 'rga');s
+select hasSubsequence('garbage', 'rga');
 select hasSubsequence('garbage', 'garbage');
 select hasSubsequence('garbage', 'garbage1');
 select hasSubsequence('garbage', 'arbw');

From 771b1f8f475993b331749af9d7edff5fc2e5866c Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 12 Jul 2023 15:48:09 +0000
Subject: [PATCH 068/242] Prototype (working)

---
 programs/server/Server.cpp      |  8 +----
 src/Common/SystemLogBase.cpp    | 54 ++++++++++++++++++++---------
 src/Common/SystemLogBase.h      | 61 ++++++++++++++++++++++++---------
 src/Interpreters/SystemLog.cpp  | 26 +++++++-------
 src/Interpreters/SystemLog.h    | 17 ++++-----
 src/Interpreters/TextLog.cpp    |  2 +-
 src/Interpreters/TextLog.h      |  8 +++++
 src/Loggers/Loggers.cpp         | 18 ++++------
 src/Loggers/Loggers.h           | 13 -------
 src/Loggers/OwnSplitChannel.cpp |  6 ++--
 src/Loggers/OwnSplitChannel.h   |  8 +++--
 11 files changed, 129 insertions(+), 92 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index c7a7ba71e83..79a28558d93 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1599,13 +1599,7 @@ try
         global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
         /// Build loggers before tables startup to make log messages from tables
         /// attach available in system.text_log
-        {
-            String level_str = config().getString("text_log.level", "");
-            int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
-            setTextLog(global_context->getTextLog(), level);
-
-            buildLoggers(config(), logger());
-        }
+        buildLoggers(config(), logger());
         /// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
         attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
         attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 86adcbbd31b..28dc5ec2666 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -38,10 +38,20 @@ namespace
 
 ISystemLog::~ISystemLog() = default;
 
-void ISystemLog::stopFlushThread()
+template <typename LogElement>
+SystemLogBase<LogElement>::SystemLogBase(std::shared_ptr<SystemLogQueue<LogElement>> ex_queue)
+{
+    if (ex_queue)
+        queue = ex_queue;
+    else
+        queue = std::make_shared<SystemLogQueue<LogElement>>();
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::stopFlushThread()
 {
     {
-        std::lock_guard lock(mutex);
+        std::lock_guard lock(queue->mutex);
 
         if (!saving_thread || !saving_thread->joinable())
             return;
@@ -52,22 +62,26 @@ void ISystemLog::stopFlushThread()
         is_shutdown = true;
 
         /// Tell thread to shutdown.
-        flush_event.notify_all();
+        queue->flush_event.notify_all();
     }
 
     saving_thread->join();
 }
 
-void ISystemLog::startup()
+template <typename LogElement>
+void SystemLogBase<LogElement>::startup()
 {
-    std::lock_guard lock(mutex);
-    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
+    std::lock_guard lock(queue->mutex);
+    std::cout << "void ISystemLog::startup()" << std::endl;
+    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { 
+        std::cout << "void ISystemLog::ThreadFromGlobalPool()" << std::endl;
+        savingThreadFunction(); });
 }
 
 static thread_local bool recursive_add_call = false;
 
 template <typename LogElement>
-void SystemLogBase<LogElement>::add(const LogElement & element)
+void SystemLogQueue<LogElement>::add(const LogElement & element)
 {
     /// It is possible that the method will be called recursively.
     /// Better to drop these events to avoid complications.
@@ -75,21 +89,21 @@ void SystemLogBase<LogElement>::add(const LogElement & element)
         return;
     recursive_add_call = true;
     SCOPE_EXIT({ recursive_add_call = false; });
-
-    /// Memory can be allocated while resizing on queue.push_back.
+     /// Memory can be allocated while resizing on queue.push_back.
     /// The size of allocation can be in order of a few megabytes.
     /// But this should not be accounted for query memory usage.
     /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
 
+
     /// Should not log messages under mutex.
     bool queue_is_half_full = false;
 
     {
         std::unique_lock lock(mutex);
 
-        if (is_shutdown)
-            return;
+        // if (queue.is_shutdown)
+        //     return;              // TODO
 
         if (queue.size() == DBMS_SYSTEM_LOG_QUEUE_SIZE / 2)
         {
@@ -134,25 +148,31 @@ void SystemLogBase<LogElement>::add(const LogElement & element)
         LOG_INFO(log, "Queue is half full for system log '{}'.", demangle(typeid(*this).name()));
 }
 
+template <typename LogElement>
+void SystemLogBase<LogElement>::add(const LogElement & element)
+{
+    queue->add(element);
+}
+
 template <typename LogElement>
 void SystemLogBase<LogElement>::flush(bool force)
 {
     uint64_t this_thread_requested_offset;
 
     {
-        std::lock_guard lock(mutex);
+        std::lock_guard lock(queue->mutex);
 
         if (is_shutdown)
             return;
 
-        this_thread_requested_offset = queue_front_index + queue.size();
+        this_thread_requested_offset = queue->queue_front_index + queue->size();
 
         // Publish our flush request, taking care not to overwrite the requests
         // made by other threads.
         is_force_prepare_tables |= force;
-        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
+        queue->requested_flush_up_to = std::max(queue->requested_flush_up_to, this_thread_requested_offset);
 
-        flush_event.notify_all();
+        queue->flush_event.notify_all();
     }
 
     LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
@@ -161,8 +181,8 @@ void SystemLogBase<LogElement>::flush(bool force)
     // too fast for our parallel functional tests, probably because they
     // heavily load the disk.
     const int timeout_seconds = 180;
-    std::unique_lock lock(mutex);
-    bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
+    std::unique_lock lock(queue->mutex);
+    bool result = queue->flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
     {
         return flushed_up_to >= this_thread_requested_offset && !is_force_prepare_tables;
     });
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index f8febd8b159..cb6003c94ef 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -55,25 +55,52 @@ public:
     virtual void prepareTable() = 0;
 
     /// Start the background thread.
-    virtual void startup();
+    virtual void startup() = 0;
 
     /// Stop the background flush thread before destructor. No more data will be written.
     virtual void shutdown() = 0;
 
+    virtual void stopFlushThread() = 0;
+
     virtual ~ISystemLog();
 
     virtual void savingThreadFunction() = 0;
 
 protected:
-    std::unique_ptr<ThreadFromGlobalPool> saving_thread;
+    std::unique_ptr<ThreadFromGlobalPool> saving_thread;    
+};
+
+template <typename LogElement>
+class SystemLogQueue
+{
+public:
+
+    void add(const LogElement & element);
+    size_t size() const { return queue.size(); }
+    void push_back(const LogElement & element)
+    {
+        queue.push_back(element);
+    }
+
+    // Queue is bounded. But its size is quite large to not block in all normal cases.
+    std::vector<LogElement> queue;
+    // An always-incrementing index of the first message currently in the queue.
+    // We use it to give a global sequential index to every message, so that we
+    // can wait until a particular message is flushed. This is used to implement
+    // synchronous log flushing for SYSTEM FLUSH LOGS.
+    uint64_t queue_front_index = 0;
 
     /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
     std::mutex mutex;
-
-    bool is_shutdown = false;
     std::condition_variable flush_event;
 
-    void stopFlushThread();
+    // Requested to flush logs up to this index, exclusive
+    uint64_t requested_flush_up_to = 0;
+
+    // Logged overflow message at this queue front index
+    uint64_t logged_queue_full_at_index = -1;
+
+    Poco::Logger * log;
 };
 
 template <typename LogElement>
@@ -82,6 +109,8 @@ class SystemLogBase : public ISystemLog
 public:
     using Self = SystemLogBase;
 
+    SystemLogBase(std::shared_ptr<SystemLogQueue<LogElement>> ex_queue = nullptr);
+
     /** Append a record into log.
       * Writing to table will be done asynchronously and in case of failure, record could be lost.
       */
@@ -90,6 +119,10 @@ public:
     /// Flush data in the buffer to disk
     void flush(bool force) override;
 
+    void startup() override;
+
+     void stopFlushThread() override;
+
     String getName() const override { return LogElement::name(); }
 
     static const char * getDefaultOrderBy() { return "event_date, event_time"; }
@@ -97,21 +130,17 @@ public:
 protected:
     Poco::Logger * log;
 
-    // Queue is bounded. But its size is quite large to not block in all normal cases.
-    std::vector<LogElement> queue;
-    // An always-incrementing index of the first message currently in the queue.
-    // We use it to give a global sequential index to every message, so that we
-    // can wait until a particular message is flushed. This is used to implement
-    // synchronous log flushing for SYSTEM FLUSH LOGS.
-    uint64_t queue_front_index = 0;
+    std::shared_ptr<SystemLogQueue<LogElement>> queue;
+
     // A flag that says we must create the tables even if the queue is empty.
     bool is_force_prepare_tables = false;
-    // Requested to flush logs up to this index, exclusive
-    uint64_t requested_flush_up_to = 0;
+
     // Flushed log up to this index, exclusive
     uint64_t flushed_up_to = 0;
-    // Logged overflow message at this queue front index
-    uint64_t logged_queue_full_at_index = -1;
+
+
+    bool is_shutdown = false;
+
 };
 
 }
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 3fd0297f5b8..cbf355d020a 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -332,8 +332,10 @@ SystemLog<LogElement>::SystemLog(
     const String & database_name_,
     const String & table_name_,
     const String & storage_def_,
-    size_t flush_interval_milliseconds_)
-    : WithContext(context_)
+    size_t flush_interval_milliseconds_,
+    std::shared_ptr<SystemLogQueue<LogElement>> ex_queue)
+    : Base(ex_queue)
+    , WithContext(context_)
     , table_id(database_name_, table_name_)
     , storage_def(storage_def_)
     , create_query(serializeAST(*getCreateTableQuery()))
@@ -371,21 +373,21 @@ void SystemLog<LogElement>::savingThreadFunction()
             bool should_prepare_tables_anyway = false;
 
             {
-                std::unique_lock lock(mutex);
-                flush_event.wait_for(lock,
+                std::unique_lock lock(queue->mutex);
+                queue->flush_event.wait_for(lock,
                     std::chrono::milliseconds(flush_interval_milliseconds),
                     [&] ()
                     {
-                        return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
+                        return queue->requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
                     }
                 );
 
-                queue_front_index += queue.size();
-                to_flush_end = queue_front_index;
+                queue->queue_front_index += queue->size();
+                to_flush_end = queue->queue_front_index;
                 // Swap with existing array from previous flush, to save memory
                 // allocations.
                 to_flush.resize(0);
-                queue.swap(to_flush);
+                queue->queue.swap(to_flush);
 
                 should_prepare_tables_anyway = is_force_prepare_tables;
 
@@ -399,9 +401,9 @@ void SystemLog<LogElement>::savingThreadFunction()
                     prepareTable();
                     LOG_TRACE(log, "Table created (force)");
 
-                    std::lock_guard lock(mutex);
+                    std::lock_guard lock(queue->mutex);
                     is_force_prepare_tables = false;
-                    flush_event.notify_all();
+                    queue->flush_event.notify_all();
                 }
             }
             else
@@ -474,10 +476,10 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
     }
 
     {
-        std::lock_guard lock(mutex);
+        std::lock_guard lock(queue->mutex);
         flushed_up_to = to_flush_end;
         is_force_prepare_tables = false;
-        flush_event.notify_all();
+        queue->flush_event.notify_all();
     }
 
     LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end);
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 84b70c67e2a..bf41ff12d2a 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -108,22 +108,23 @@ public:
         const String & database_name_,
         const String & table_name_,
         const String & storage_def_,
-        size_t flush_interval_milliseconds_);
+        size_t flush_interval_milliseconds_,
+        std::shared_ptr<SystemLogQueue<LogElement>> ex_queue = nullptr);
 
     void shutdown() override;
 
 protected:
-    using ISystemLog::mutex;
-    using ISystemLog::is_shutdown;
-    using ISystemLog::flush_event;
-    using ISystemLog::stopFlushThread;
+    //using ISystemLog::mutex;
+    using Base::is_shutdown;
+   // using ISystemLog::flush_event;
+    using Base::stopFlushThread;
     using Base::log;
     using Base::queue;
-    using Base::queue_front_index;
+   // using Base::queue_front_index;
     using Base::is_force_prepare_tables;
-    using Base::requested_flush_up_to;
+    //using Base::requested_flush_up_to;
     using Base::flushed_up_to;
-    using Base::logged_queue_full_at_index;
+ //   using Base::logged_queue_full_at_index;
 
 private:
 
diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp
index 45d5a7b2344..a66092c1c2b 100644
--- a/src/Interpreters/TextLog.cpp
+++ b/src/Interpreters/TextLog.cpp
@@ -84,7 +84,7 @@ TextLog::TextLog(ContextPtr context_, const String & database_name_,
         const String & table_name_, const String & storage_def_,
         size_t flush_interval_milliseconds_)
   : SystemLog<TextLogElement>(context_, database_name_, table_name_,
-        storage_def_, flush_interval_milliseconds_)
+        storage_def_, flush_interval_milliseconds_, getLogQueue())
 {
     // SystemLog methods may write text logs, so we disable logging for the text
     // log table to avoid recursion.
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 6efc1c906d4..9c761f9d865 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -46,6 +46,14 @@ public:
         const String & table_name_,
         const String & storage_def_,
         size_t flush_interval_milliseconds_);
+    
+    static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue()
+    {
+        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>();
+        return queue;
+    }
 };
 
+
+
 }
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index 0c3a7bd615d..96793ee5e0a 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -34,21 +34,9 @@ static std::string createDirectory(const std::string & file)
     return path;
 }
 
-#ifndef WITHOUT_TEXT_LOG
-void Loggers::setTextLog(std::shared_ptr<DB::TextLog> log, int max_priority)
-{
-    text_log = log;
-    text_log_max_priority = max_priority;
-}
-#endif
 
 void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
 {
-#ifndef WITHOUT_TEXT_LOG
-    if (split)
-        if (auto log = text_log.lock())
-            split->addTextLog(log, text_log_max_priority);
-#endif
 
     auto current_logger = config.getString("logger", "");
     if (config_logger.has_value() && *config_logger == current_logger)
@@ -62,6 +50,12 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
     /// Use extended interface of Channel for more comprehensive logging.
     split = new DB::OwnSplitChannel();
 
+#ifndef WITHOUT_TEXT_LOG
+    String text_log_level_str = config.getString("text_log.level", "");
+    int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
+    split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
+#endif
+
     auto log_level_string = config.getString("logger.level", "trace");
 
     /// different channels (log, console, syslog) may have different loglevels configured
diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h
index ebc10954b94..9eff731a4c5 100644
--- a/src/Loggers/Loggers.h
+++ b/src/Loggers/Loggers.h
@@ -7,12 +7,6 @@
 #include <Poco/Util/Application.h>
 #include "OwnSplitChannel.h"
 
-#ifndef WITHOUT_TEXT_LOG
-namespace DB
-{
-    class TextLog;
-}
-#endif
 
 namespace Poco::Util
 {
@@ -29,9 +23,6 @@ public:
     /// Close log files. On next log write files will be reopened.
     void closeLogs(Poco::Logger & logger);
 
-#ifndef WITHOUT_TEXT_LOG
-    void setTextLog(std::shared_ptr<DB::TextLog> log, int max_priority);
-#endif
 
 private:
     Poco::AutoPtr<Poco::FileChannel> log_file;
@@ -41,10 +32,6 @@ private:
     /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed.
     std::optional<std::string> config_logger;
 
-#ifndef WITHOUT_TEXT_LOG
-    std::weak_ptr<DB::TextLog> text_log;
-    int text_log_max_priority = -1;
-#endif
 
     Poco::AutoPtr<DB::OwnSplitChannel> split;
 };
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 03db198c305..7661996ea33 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -135,7 +135,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
         elem.source_line = msg.getSourceLine();
         elem.message_format_string = msg.getFormatString();
 
-        std::shared_ptr<TextLog> text_log_locked{};
+        std::shared_ptr<SystemLogQueue<TextLogElement>> text_log_locked{};
         {
             std::lock_guard lock(text_log_mutex);
             text_log_locked = text_log.lock();
@@ -153,10 +153,10 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr<Poco::Channel> channel, const std
 }
 
 #ifndef WITHOUT_TEXT_LOG
-void OwnSplitChannel::addTextLog(std::shared_ptr<DB::TextLog> log, int max_priority)
+void OwnSplitChannel::addTextLog(std::shared_ptr<SystemLogQueue<TextLogElement>> log_queue, int max_priority)
 {
     std::lock_guard lock(text_log_mutex);
-    text_log = log;
+    text_log = log_queue;
     text_log_max_priority.store(max_priority, std::memory_order_relaxed);
 }
 #endif
diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h
index 80305c1ccee..ab86c32f85a 100644
--- a/src/Loggers/OwnSplitChannel.h
+++ b/src/Loggers/OwnSplitChannel.h
@@ -10,7 +10,9 @@
 #ifndef WITHOUT_TEXT_LOG
 namespace DB
 {
-    class TextLog;
+    template <typename> class SystemLogQueue;
+    struct TextLogElement;
+    using FooBar = SystemLogQueue<TextLogElement>;
 }
 #endif
 
@@ -31,7 +33,7 @@ public:
     void addChannel(Poco::AutoPtr<Poco::Channel> channel, const std::string & name);
 
 #ifndef WITHOUT_TEXT_LOG
-    void addTextLog(std::shared_ptr<DB::TextLog> log, int max_priority);
+    void addTextLog(std::shared_ptr<DB::FooBar> log_queue, int max_priority);
 #endif
 
     void setLevel(const std::string & name, int level);
@@ -48,7 +50,7 @@ private:
     std::mutex text_log_mutex;
 
 #ifndef WITHOUT_TEXT_LOG
-    std::weak_ptr<DB::TextLog> text_log;
+    std::weak_ptr<DB::FooBar> text_log;
     std::atomic<int> text_log_max_priority = -1;
 #endif
 };

From 3649f104444076f7f5cc232fae8940cde8025d46 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 13 Jul 2023 05:19:18 +0000
Subject: [PATCH 069/242] Remove spaces

---
 src/Common/SystemLogBase.cpp | 4 +---
 src/Common/SystemLogBase.h   | 2 +-
 src/Interpreters/TextLog.h   | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 28dc5ec2666..2d2c3e2053f 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -73,9 +73,7 @@ void SystemLogBase<LogElement>::startup()
 {
     std::lock_guard lock(queue->mutex);
     std::cout << "void ISystemLog::startup()" << std::endl;
-    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { 
-        std::cout << "void ISystemLog::ThreadFromGlobalPool()" << std::endl;
-        savingThreadFunction(); });
+    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
 static thread_local bool recursive_add_call = false;
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index cb6003c94ef..221d9946b48 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -67,7 +67,7 @@ public:
     virtual void savingThreadFunction() = 0;
 
 protected:
-    std::unique_ptr<ThreadFromGlobalPool> saving_thread;    
+    std::unique_ptr<ThreadFromGlobalPool> saving_thread;
 };
 
 template <typename LogElement>
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 9c761f9d865..33c38da2f8f 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -46,7 +46,7 @@ public:
         const String & table_name_,
         const String & storage_def_,
         size_t flush_interval_milliseconds_);
-    
+
     static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue()
     {
         static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>();
@@ -54,6 +54,4 @@ public:
     }
 };
 
-
-
 }

From 220c0255abea1c4fbb5dd66a4fda350be9aa9023 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 13 Jul 2023 08:15:50 +0000
Subject: [PATCH 070/242] Remove mutex

---
 src/Common/SystemLogBase.cpp    |  1 -
 src/Loggers/Loggers.cpp         | 10 ++++++----
 src/Loggers/OwnSplitChannel.cpp |  6 +-----
 src/Loggers/OwnSplitChannel.h   |  2 --
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 2d2c3e2053f..09f4c050f15 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -72,7 +72,6 @@ template <typename LogElement>
 void SystemLogBase<LogElement>::startup()
 {
     std::lock_guard lock(queue->mutex);
-    std::cout << "void ISystemLog::startup()" << std::endl;
     saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index 96793ee5e0a..2fda836e7b7 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -37,7 +37,6 @@ static std::string createDirectory(const std::string & file)
 
 void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
 {
-
     auto current_logger = config.getString("logger", "");
     if (config_logger.has_value() && *config_logger == current_logger)
         return;
@@ -51,9 +50,12 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
     split = new DB::OwnSplitChannel();
 
 #ifndef WITHOUT_TEXT_LOG
-    String text_log_level_str = config.getString("text_log.level", "");
-    int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
-    split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
+    if (config.has("text_log"))
+    {
+        String text_log_level_str = config.getString("text_log.level", "");
+        int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
+        split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
+    }
 #endif
 
     auto log_level_string = config.getString("logger.level", "trace");
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 7661996ea33..17806153905 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -136,10 +136,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
         elem.message_format_string = msg.getFormatString();
 
         std::shared_ptr<SystemLogQueue<TextLogElement>> text_log_locked{};
-        {
-            std::lock_guard lock(text_log_mutex);
-            text_log_locked = text_log.lock();
-        }
+        text_log_locked = text_log.lock();
         if (text_log_locked)
             text_log_locked->add(elem);
     }
@@ -155,7 +152,6 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr<Poco::Channel> channel, const std
 #ifndef WITHOUT_TEXT_LOG
 void OwnSplitChannel::addTextLog(std::shared_ptr<SystemLogQueue<TextLogElement>> log_queue, int max_priority)
 {
-    std::lock_guard lock(text_log_mutex);
     text_log = log_queue;
     text_log_max_priority.store(max_priority, std::memory_order_relaxed);
 }
diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h
index ab86c32f85a..155d0872465 100644
--- a/src/Loggers/OwnSplitChannel.h
+++ b/src/Loggers/OwnSplitChannel.h
@@ -47,8 +47,6 @@ private:
     using ExtendedChannelPtrPair = std::pair<ChannelPtr, ExtendedLogChannel *>;
     std::map<std::string, ExtendedChannelPtrPair> channels;
 
-    std::mutex text_log_mutex;
-
 #ifndef WITHOUT_TEXT_LOG
     std::weak_ptr<DB::FooBar> text_log;
     std::atomic<int> text_log_max_priority = -1;

From 4a10c7286e9fa95694284968be1f39fffcb29b6c Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 13 Jul 2023 12:41:17 +0000
Subject: [PATCH 071/242] Add name to logger in queue

---
 src/Common/SystemLogBase.cpp   | 19 ++++++++++++++-----
 src/Common/SystemLogBase.h     |  5 ++++-
 src/Interpreters/SystemLog.cpp |  4 ++--
 src/Interpreters/TextLog.h     |  2 +-
 src/Loggers/Loggers.cpp        | 17 ++++++++---------
 5 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 09f4c050f15..8285f133d73 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -39,12 +39,14 @@ namespace
 ISystemLog::~ISystemLog() = default;
 
 template <typename LogElement>
-SystemLogBase<LogElement>::SystemLogBase(std::shared_ptr<SystemLogQueue<LogElement>> ex_queue)
+SystemLogBase<LogElement>::SystemLogBase(
+    const String & name_,
+    std::shared_ptr<SystemLogQueue<LogElement>> queue_)
 {
-    if (ex_queue)
-        queue = ex_queue;
+    if (queue_)
+        queue = queue_;
     else
-        queue = std::make_shared<SystemLogQueue<LogElement>>();
+        queue = std::make_shared<SystemLogQueue<LogElement>>(name_);
 }
 
 template <typename LogElement>
@@ -75,6 +77,11 @@ void SystemLogBase<LogElement>::startup()
     saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
+template <typename LogElement>
+SystemLogQueue<LogElement>::SystemLogQueue(const String & name_)
+    : log(&Poco::Logger::get(name_))
+{}
+
 static thread_local bool recursive_add_call = false;
 
 template <typename LogElement>
@@ -92,7 +99,6 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
     /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
 
-
     /// Should not log messages under mutex.
     bool queue_is_half_full = false;
 
@@ -194,4 +200,7 @@ void SystemLogBase<LogElement>::flush(bool force)
 #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
 
+#define INSTANTIATE_SYSTEM_LOG_BASE2(ELEMENT) template class SystemLogQueue<ELEMENT>;
+SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE2)
+
 }
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 221d9946b48..3b1b848369b 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -74,6 +74,7 @@ template <typename LogElement>
 class SystemLogQueue
 {
 public:
+    SystemLogQueue(const String & name_);
 
     void add(const LogElement & element);
     size_t size() const { return queue.size(); }
@@ -109,7 +110,9 @@ class SystemLogBase : public ISystemLog
 public:
     using Self = SystemLogBase;
 
-    SystemLogBase(std::shared_ptr<SystemLogQueue<LogElement>> ex_queue = nullptr);
+    SystemLogBase(
+        const String & name_,
+        std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
 
     /** Append a record into log.
       * Writing to table will be done asynchronously and in case of failure, record could be lost.
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index cbf355d020a..b77cb2311d5 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -333,8 +333,8 @@ SystemLog<LogElement>::SystemLog(
     const String & table_name_,
     const String & storage_def_,
     size_t flush_interval_milliseconds_,
-    std::shared_ptr<SystemLogQueue<LogElement>> ex_queue)
-    : Base(ex_queue)
+    std::shared_ptr<SystemLogQueue<LogElement>> queue_)
+    : Base(database_name_ + "." + table_name_, queue_)
     , WithContext(context_)
     , table_id(database_name_, table_name_)
     , storage_def(storage_def_)
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 33c38da2f8f..8390259e147 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -49,7 +49,7 @@ public:
 
     static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue()
     {
-        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>();
+        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>("text_log");
         return queue;
     }
 };
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index 2fda836e7b7..fa143440cc2 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -49,15 +49,6 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
     /// Use extended interface of Channel for more comprehensive logging.
     split = new DB::OwnSplitChannel();
 
-#ifndef WITHOUT_TEXT_LOG
-    if (config.has("text_log"))
-    {
-        String text_log_level_str = config.getString("text_log.level", "");
-        int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
-        split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
-    }
-#endif
-
     auto log_level_string = config.getString("logger.level", "trace");
 
     /// different channels (log, console, syslog) may have different loglevels configured
@@ -258,6 +249,14 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
             }
         }
     }
+#ifndef WITHOUT_TEXT_LOG
+    if (config.has("text_log"))
+    {
+        String text_log_level_str = config.getString("text_log.level", "");
+        int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
+        split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
+    }
+#endif
 }
 
 void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger)

From 2f17208792363f73db8fb750aa6e3e6f907bac37 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 13 Jul 2023 20:38:17 +0200
Subject: [PATCH 072/242] Add missing --force for docker network prune
 (otherwise it is noop on CI)

On CI there is no stdin attached, so this command simply do nothing, fix
this.

Fixes: https://github.com/ClickHouse/ClickHouse/pull/51969
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 5933883f7b0..a4e25e653b3 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -45,7 +45,7 @@ def cleanup_environment():
 
         logging.debug("Pruning Docker networks")
         run_and_check(
-            ["docker network prune"],
+            ["docker network prune --force"],
             shell=True,
             nothrow=True,
         )

From f153d6aa3c69b5d7a6e6df87ffdb84b1632e5abe Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Fri, 14 Jul 2023 10:02:18 +0000
Subject: [PATCH 073/242] Fix shutdown

---
 src/Common/SystemLogBase.cpp  | 13 +++++++------
 src/Common/SystemLogBase.h    |  8 ++++----
 src/Loggers/OwnSplitChannel.h |  6 +++---
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 211bd457714..7373786e514 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -62,6 +62,7 @@ void SystemLogBase<LogElement>::stopFlushThread()
             return;
 
         is_shutdown = true;
+        queue->shutdown();
 
         /// Tell thread to shutdown.
         queue->flush_event.notify_all();
@@ -105,8 +106,8 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
     {
         std::unique_lock lock(mutex);
 
-        // if (queue.is_shutdown)
-        //     return;              // TODO
+        if (is_shutdown)
+            return;
 
         if (queue.size() == DBMS_SYSTEM_LOG_QUEUE_SIZE / 2)
         {
@@ -191,18 +192,18 @@ uint64_t SystemLogBase<LogElement>::notifyFlushImpl(bool force)
     uint64_t this_thread_requested_offset;
 
     {
-        std::lock_guard lock(mutex);
+        std::lock_guard lock(queue->mutex);
         if (is_shutdown)
             return uint64_t(-1);
 
-        this_thread_requested_offset = queue_front_index + queue.size();
+        this_thread_requested_offset = queue->queue_front_index + queue->queue.size();
 
         // Publish our flush request, taking care not to overwrite the requests
         // made by other threads.
         is_force_prepare_tables |= force;
-        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
+        queue->requested_flush_up_to = std::max(queue->requested_flush_up_to, this_thread_requested_offset);
 
-        flush_event.notify_all();
+        queue->flush_event.notify_all();
     }
 
     LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index cf6efc68119..109334964d2 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -78,10 +78,8 @@ public:
 
     void add(const LogElement & element);
     size_t size() const { return queue.size(); }
-    void push_back(const LogElement & element)
-    {
-        queue.push_back(element);
-    }
+    //void push_back(const LogElement & element) { queue.push_back(element); }
+    void shutdown() { is_shutdown = true; }
 
     // Queue is bounded. But its size is quite large to not block in all normal cases.
     std::vector<LogElement> queue;
@@ -101,7 +99,9 @@ public:
     // Logged overflow message at this queue front index
     uint64_t logged_queue_full_at_index = -1;
 
+private:
     Poco::Logger * log;
+    bool is_shutdown = false;
 };
 
 template <typename LogElement>
diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h
index 155d0872465..a6ee8af5b14 100644
--- a/src/Loggers/OwnSplitChannel.h
+++ b/src/Loggers/OwnSplitChannel.h
@@ -12,7 +12,7 @@ namespace DB
 {
     template <typename> class SystemLogQueue;
     struct TextLogElement;
-    using FooBar = SystemLogQueue<TextLogElement>;
+    using TextLogQueue = SystemLogQueue<TextLogElement>;
 }
 #endif
 
@@ -33,7 +33,7 @@ public:
     void addChannel(Poco::AutoPtr<Poco::Channel> channel, const std::string & name);
 
 #ifndef WITHOUT_TEXT_LOG
-    void addTextLog(std::shared_ptr<DB::FooBar> log_queue, int max_priority);
+    void addTextLog(std::shared_ptr<DB::TextLogQueue> log_queue, int max_priority);
 #endif
 
     void setLevel(const std::string & name, int level);
@@ -48,7 +48,7 @@ private:
     std::map<std::string, ExtendedChannelPtrPair> channels;
 
 #ifndef WITHOUT_TEXT_LOG
-    std::weak_ptr<DB::FooBar> text_log;
+    std::weak_ptr<DB::TextLogQueue> text_log;
     std::atomic<int> text_log_max_priority = -1;
 #endif
 };

From 57ee7916cc348074f52940b225f703f473d972cb Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Fri, 14 Jul 2023 12:01:26 +0000
Subject: [PATCH 074/242] Add tests to nullable types

---
 tests/queries/0_stateless/02809_has_subsequence.reference | 7 +++++++
 tests/queries/0_stateless/02809_has_subsequence.sql       | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference
index 5f533d7f5bb..66da41ccc87 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.reference
+++ b/tests/queries/0_stateless/02809_has_subsequence.reference
@@ -55,3 +55,10 @@ hasSubsequenceCaseInsensitiveUTF8
 0
 1
 0
+Nullable
+\N
+\N
+\N
+1
+1
+1
diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql
index bcc491a95fe..13b92164cf5 100644
--- a/tests/queries/0_stateless/02809_has_subsequence.sql
+++ b/tests/queries/0_stateless/02809_has_subsequence.sql
@@ -58,3 +58,11 @@ select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки
 select hasSubsequenceCaseInsensitiveUTF8('для онлайн обработки аналитических запросов (OLAP)', materialize('зло()'));
 select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика'));
 select hasSubsequenceCaseInsensitiveUTF8(materialize('для онлайн обработки аналитических запросов (OLAP)'), materialize('аналитика для аналитиков'));
+
+select 'Nullable';
+select hasSubsequence(Null, Null);
+select hasSubsequence(Null, 'a');
+select hasSubsequence(Null::Nullable(String), 'arg'::Nullable(String));
+select hasSubsequence('garbage'::Nullable(String), 'a');
+select hasSubsequence('garbage'::Nullable(String), 'arg'::Nullable(String));
+select hasSubsequence(materialize('garbage'::Nullable(String)), materialize('arg'::Nullable(String)));
\ No newline at end of file

From 4f6d59ce7f2eaf032a8e833153da909b155bf3ae Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 14 Jul 2023 05:57:46 +0200
Subject: [PATCH 075/242] tests: drop existing view in
 test_materialized_mysql_database

This should avoid failures of other tests in case of some previous test
failed

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../test_materialized_mysql_database/materialized_with_ddl.py    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index 8cf9e67bf63..73f6e11d7f8 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -992,6 +992,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name):
     )
     check_query(clickhouse_node, "SHOW TABLES FROM db FORMAT TSV", "t\n")
     clickhouse_node.query("SYSTEM STOP MERGES db.t")
+    clickhouse_node.query("DROP VIEW v IF EXISTS")
     clickhouse_node.query("CREATE VIEW v AS SELECT * FROM db.t")
     mysql_node.query("INSERT INTO db.t VALUES (1, 1), (2, 2)")
     mysql_node.query("DELETE FROM db.t WHERE a = 2;")

From 527d77bc0fb37dd7af6d39e9f490f8621da3032e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 14 Jul 2023 21:12:12 +0200
Subject: [PATCH 076/242] system.licenses table will display hard forks

---
 .../0_stateless/02813_system_licenses_base.reference       | 7 +++++++
 tests/queries/0_stateless/02813_system_licenses_base.sql   | 1 +
 utils/list-licenses/list-licenses.sh                       | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02813_system_licenses_base.reference
 create mode 100644 tests/queries/0_stateless/02813_system_licenses_base.sql

diff --git a/tests/queries/0_stateless/02813_system_licenses_base.reference b/tests/queries/0_stateless/02813_system_licenses_base.reference
new file mode 100644
index 00000000000..89c42cc7a2f
--- /dev/null
+++ b/tests/queries/0_stateless/02813_system_licenses_base.reference
@@ -0,0 +1,7 @@
+Row 1:
+──────
+library_name: poco
+license_type: Boost
+license_path: /base/poco/LICENSE
+license_text: Boost Software License - Version 1.0 - August 17th, 2003
+
diff --git a/tests/queries/0_stateless/02813_system_licenses_base.sql b/tests/queries/0_stateless/02813_system_licenses_base.sql
new file mode 100644
index 00000000000..e4b2ca3d36b
--- /dev/null
+++ b/tests/queries/0_stateless/02813_system_licenses_base.sql
@@ -0,0 +1 @@
+SELECT * REPLACE substring(license_text, 1, position(license_text, '\n')) AS license_text FROM system.licenses WHERE library_name = 'poco' FORMAT Vertical;
diff --git a/utils/list-licenses/list-licenses.sh b/utils/list-licenses/list-licenses.sh
index dd23e6321c8..cee5cf87a08 100755
--- a/utils/list-licenses/list-licenses.sh
+++ b/utils/list-licenses/list-licenses.sh
@@ -12,7 +12,7 @@ fi
 ROOT_PATH="$(git rev-parse --show-toplevel)"
 LIBS_PATH="${ROOT_PATH}/contrib"
 
-ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while read LIB; do
+ls -1 -d ${LIBS_PATH}/*/ "${ROOT_PATH}/base/poco" | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while read LIB; do
     LIB_NAME=$(basename $LIB)
 
     LIB_LICENSE=$(
@@ -72,7 +72,7 @@ ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while
          echo "HPND") ||
         echo "Unknown")
 
-        RELATIVE_PATH=$(echo "$LIB_LICENSE" | sed -r -e 's!^.+/contrib/!/contrib/!')
+        RELATIVE_PATH=$(echo "$LIB_LICENSE" | sed -r -e 's!^.+/(contrib|base)/!/\1/!')
 
         echo -e "$LIB_NAME\t$LICENSE_TYPE\t$RELATIVE_PATH"
     fi

From 386adfad3365d3026ec8a3fe11536eead780262d Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Sat, 15 Jul 2023 16:21:58 +0000
Subject: [PATCH 077/242] Avro input format support Union with single type

---
 .../Formats/Impl/AvroRowInputFormat.cpp       |  28 ++++++++++++++----
 .../02813_avro_union_with_one_type.reference  |   5 ++++
 .../02813_avro_union_with_one_type.sh         |  13 ++++++++
 .../0_stateless/data_avro/union_one_type.avro | Bin 0 -> 304 bytes
 4 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02813_avro_union_with_one_type.reference
 create mode 100755 tests/queries/0_stateless/02813_avro_union_with_one_type.sh
 create mode 100644 tests/queries/0_stateless/data_avro/union_one_type.avro

diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 4cd73cb23b5..771247a983b 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -367,14 +367,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
             break;
         case avro::AVRO_UNION:
         {
-            if (root_node->leaves() == 2
+            if (root_node->leaves() == 1)
+            {
+                auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type);
+                return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
+                {
+                    decoder.decodeUnionIndex();
+                    nested_deserialize(column, decoder);
+                    return true;
+                };
+            }
+            /// FIXME Support UNION has more than two datatypes.
+            else if (
+                root_node->leaves() == 2
                 && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
             {
                 int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
                 if (target.isNullable())
                 {
-                    auto nested_deserialize = this->createDeserializeFn(
-                        root_node->leafAt(non_null_union_index), removeNullable(target_type));
+                    auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
                     return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
                     {
                         ColumnNullable & col = assert_cast<ColumnNullable &>(column);
@@ -393,7 +404,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
                 }
                 else if (null_as_default)
                 {
-                    auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
+                    auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
                     return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
                     {
                         int union_index = static_cast<int>(decoder.decodeUnionIndex());
@@ -1169,12 +1180,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
         case avro::Type::AVRO_NULL:
             return std::make_shared<DataTypeNothing>();
         case avro::Type::AVRO_UNION:
-            if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
+            if (node->leaves() == 1)
+            {
+                return avroNodeToDataType(node->leafAt(0));
+            }
+            else if (
+                node->leaves() == 2
+                && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
             {
                 int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
                 auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
                 return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
             }
+            /// FIXME Support UNION has more than two datatypes.
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type  UNION is not supported for inserting.");
         case avro::Type::AVRO_SYMBOLIC:
             return avroNodeToDataType(avro::resolveSymbol(node));
diff --git a/tests/queries/0_stateless/02813_avro_union_with_one_type.reference b/tests/queries/0_stateless/02813_avro_union_with_one_type.reference
new file mode 100644
index 00000000000..c65bed48055
--- /dev/null
+++ b/tests/queries/0_stateless/02813_avro_union_with_one_type.reference
@@ -0,0 +1,5 @@
+name	String					
+favorite_number	Int32					
+favorite_color	String					
+Alyssa	256	yellow
+Ben	7	red
diff --git a/tests/queries/0_stateless/02813_avro_union_with_one_type.sh b/tests/queries/0_stateless/02813_avro_union_with_one_type.sh
new file mode 100755
index 00000000000..b58dc9126da
--- /dev/null
+++ b/tests/queries/0_stateless/02813_avro_union_with_one_type.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+DATA_DIR=$CUR_DIR/data_avro
+
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_one_type.avro')"
+$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_one_type.avro')"
diff --git a/tests/queries/0_stateless/data_avro/union_one_type.avro b/tests/queries/0_stateless/data_avro/union_one_type.avro
new file mode 100644
index 0000000000000000000000000000000000000000..07e6140e5e2f020fd44f59a674598d7c4c7e7214
GIT binary patch
literal 304
zcmeZI%3@>@Nh~YM*GtY%NloU+E6vFf1M`cMGg5OCXE9eRl~fj_Dp@Hg6{RNU7o{la
zC@AG6=7L2+i&KkW{NjSdWUydrMPhD2PO2Wr6p&<EW@=7KG0=c$gi*yMMVWc&Fs&fN
z!A92VD8SW3D`n=DD8(Y#mX=tSUzAyr8lP91n*_8NRW;N|tQwN@bMk@eV`~{UZmL}T
yaUXBT#K}+J`}mg_u~_jq=2R9JCo(j!Fz{5S=H%p;vpJ>aG4L_46{V)2D+d7n$7llp

literal 0
HcmV?d00001


From 0d48dca171c146871996046855dfa9790aa421e5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jul 2023 20:36:16 +0300
Subject: [PATCH 078/242] Update materialized_with_ddl.py

---
 .../test_materialized_mysql_database/materialized_with_ddl.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index 73f6e11d7f8..8926c0e5c81 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -992,7 +992,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name):
     )
     check_query(clickhouse_node, "SHOW TABLES FROM db FORMAT TSV", "t\n")
     clickhouse_node.query("SYSTEM STOP MERGES db.t")
-    clickhouse_node.query("DROP VIEW v IF EXISTS")
+    clickhouse_node.query("DROP VIEW IF EXISTS v")
     clickhouse_node.query("CREATE VIEW v AS SELECT * FROM db.t")
     mysql_node.query("INSERT INTO db.t VALUES (1, 1), (2, 2)")
     mysql_node.query("DELETE FROM db.t WHERE a = 2;")

From c62089134049efb42fbf76a90c0199a7bcc8c491 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 16 Jul 2023 03:14:44 +0200
Subject: [PATCH 079/242] Slightly more changes

---
 src/Interpreters/ExpressionAnalyzer.cpp | 46 ++++++++++++++-----------
 src/Interpreters/ExpressionAnalyzer.h   |  2 +-
 src/Interpreters/GetAggregatesVisitor.h | 10 +++---
 src/Interpreters/TreeRewriter.cpp       | 35 +++++++++----------
 src/Interpreters/TreeRewriter.h         |  4 +--
 5 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9a450fabd5b..9aee61eb8f0 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -548,15 +548,17 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo
 
 void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions)
 {
-    for (const ASTFunction * node : aggregates())
+    for (const ASTPtr & ast : aggregates())
     {
+        const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
+
         AggregateDescription aggregate;
-        if (node->arguments)
-            getRootActionsNoMakeSet(node->arguments, actions);
+        if (node.arguments)
+            getRootActionsNoMakeSet(node.arguments, actions);
 
-        aggregate.column_name = node->getColumnName();
+        aggregate.column_name = node.getColumnName();
 
-        const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
+        const ASTs & arguments = node.arguments ? node.arguments->children : ASTs();
         aggregate.argument_names.resize(arguments.size());
         DataTypes types(arguments.size());
 
@@ -568,7 +570,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
             {
                 throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
                     "Unknown identifier '{}' in aggregate function '{}'",
-                    name, node->formatForErrorMessage());
+                    name, node.formatForErrorMessage());
             }
 
             types[i] = dag_node->result_type;
@@ -576,8 +578,8 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
         }
 
         AggregateFunctionProperties properties;
-        aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array();
-        aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties);
+        aggregate.parameters = (node.parameters) ? getAggregateFunctionParametersArray(node.parameters, "", getContext()) : Array();
+        aggregate.function = AggregateFunctionFactory::instance().get(node.name, types, aggregate.parameters, properties);
 
         descriptions.push_back(aggregate);
     }
@@ -744,12 +746,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
     }
 
     // Window functions
-    for (const ASTFunction * function_node : syntax->window_function_asts)
+    for (const ASTPtr & ast : syntax->window_function_asts)
     {
-        assert(function_node->is_window_function);
+        const ASTFunction & function_node = typeid_cast<const ASTFunction &>(*ast);
+        assert(function_node.is_window_function);
 
         WindowFunctionDescription window_function;
-        window_function.function_node = function_node;
+        window_function.function_node = &function_node;
         window_function.column_name
             = window_function.function_node->getColumnName();
         window_function.function_parameters
@@ -760,7 +763,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
 
         // Requiring a constant reference to a shared pointer to non-const AST
         // doesn't really look sane, but the visitor does indeed require it.
-        // Hence we clone the node (not very sane either, I know).
+        // Hence, we clone the node (not very sane either, I know).
         getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
 
         const ASTs & arguments
@@ -793,22 +796,22 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
         // Find the window corresponding to this function. It may be either
         // referenced by name and previously defined in WINDOW clause, or it
         // may be defined inline.
-        if (!function_node->window_name.empty())
+        if (!function_node.window_name.empty())
         {
-            auto it = window_descriptions.find(function_node->window_name);
+            auto it = window_descriptions.find(function_node.window_name);
             if (it == std::end(window_descriptions))
             {
                 throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
                     "Window '{}' is not defined (referenced by '{}')",
-                    function_node->window_name,
-                    function_node->formatForErrorMessage());
+                    function_node.window_name,
+                    function_node.formatForErrorMessage());
             }
 
             it->second.window_functions.push_back(window_function);
         }
         else
         {
-            const auto & definition = function_node->window_definition->as<
+            const auto & definition = function_node.window_definition->as<
                 const ASTWindowDefinition &>();
             WindowDescription desc;
             desc.window_name = definition.getDefaultWindowName();
@@ -1323,10 +1326,13 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
         GetAggregatesVisitor(data).visit(select_query->orderBy());
 
     /// TODO: data.aggregates -> aggregates()
-    for (const ASTFunction * node : data.aggregates)
-        if (node->arguments)
-            for (auto & argument : node->arguments->children)
+    for (const ASTPtr & ast : data.aggregates)
+    {
+        const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
+        if (node.arguments)
+            for (auto & argument : node.arguments->children)
                 getRootActions(argument, only_types, step.actions());
+    }
 }
 
 void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 271c3943afc..941194e69ff 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -168,7 +168,7 @@ protected:
     const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
     const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
     const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
-    const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
+    const ASTs & aggregates() const { return syntax->aggregates; }
     /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
     void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain);
 
diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h
index fdf54de3e57..7bf6591af69 100644
--- a/src/Interpreters/GetAggregatesVisitor.h
+++ b/src/Interpreters/GetAggregatesVisitor.h
@@ -26,8 +26,8 @@ public:
         // Explicit empty initializers are needed to make designated initializers
         // work on GCC 10.
         std::unordered_set<String> uniq_names {};
-        std::vector<const ASTFunction *> aggregates {};
-        std::vector<const ASTFunction *> window_functions {};
+        ASTs aggregates;
+        ASTs window_functions;
     };
 
     static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
@@ -61,7 +61,7 @@ public:
     }
 
 private:
-    static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
+    static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
     {
         if (isAggregateFunction(node))
         {
@@ -74,7 +74,7 @@ private:
                 return;
 
             data.uniq_names.insert(column_name);
-            data.aggregates.push_back(&node);
+            data.aggregates.push_back(ast);
         }
         else if (node.is_window_function)
         {
@@ -87,7 +87,7 @@ private:
                 return;
 
             data.uniq_names.insert(column_name);
-            data.window_functions.push_back(&node);
+            data.window_functions.push_back(ast);
         }
     }
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 65b5d950975..aa493a1b55d 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -731,7 +731,7 @@ void expandGroupByAll(ASTSelectQuery * select_query)
     select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
 }
 
-std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
+ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
 {
     /// There can not be aggregate functions inside the WHERE and PREWHERE.
     if (select_query.where())
@@ -743,11 +743,12 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
     GetAggregatesVisitor(data).visit(query);
 
     /// There can not be other aggregate functions within the aggregate functions.
-    for (const ASTFunction * node : data.aggregates)
+    for (const ASTPtr & ast : data.aggregates)
     {
-        if (node->arguments)
+        const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
+        if (node.arguments)
         {
-            for (auto & arg : node->arguments->children)
+            for (auto & arg : node.arguments->children)
             {
                 assertNoAggregates(arg, "inside another aggregate function");
                 // We also can't have window functions inside aggregate functions,
@@ -759,7 +760,7 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
     return data.aggregates;
 }
 
-std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
+ASTs getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
 {
     /// There can not be window functions inside the WHERE, PREWHERE and HAVING
     if (select_query.having())
@@ -777,20 +778,16 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
     /// Window functions cannot be inside aggregates or other window functions.
     /// Aggregate functions can be inside window functions because they are
     /// calculated earlier.
-    for (const ASTFunction * node : data.window_functions)
+    for (const ASTPtr & ast : data.window_functions)
     {
-        if (node->arguments)
-        {
-            for (auto & arg : node->arguments->children)
-            {
-                assertNoWindows(arg, "inside another window function");
-            }
-        }
+        const ASTFunction & node = typeid_cast<const ASTFunction &>(*ast);
 
-        if (node->window_definition)
-        {
-            assertNoWindows(node->window_definition, "inside window definition");
-        }
+        if (node.arguments)
+            for (auto & arg : node.arguments->children)
+                assertNoWindows(arg, "inside another window function");
+
+        if (node.window_definition)
+            assertNoWindows(node.window_definition, "inside window definition");
     }
 
     return data.window_functions;
@@ -1357,8 +1354,8 @@ TreeRewriterResultPtr TreeRewriter::analyze(
         GetAggregatesVisitor(data).visit(query);
 
         /// There can not be other aggregate functions within the aggregate functions.
-        for (const ASTFunction * node : data.aggregates)
-            for (auto & arg : node->arguments->children)
+        for (const ASTPtr & node : data.aggregates)
+            for (auto & arg : typeid_cast<const ASTFunction &>(*node).arguments->children)
                 assertNoAggregates(arg, "inside another aggregate function");
         result.aggregates = data.aggregates;
     }
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index ea16c432d0f..206a63541a6 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -41,8 +41,8 @@ struct TreeRewriterResult
 
     Aliases aliases;
 
-    std::vector<const ASTFunction *> aggregates;
-    std::vector<const ASTFunction *> window_function_asts;
+    ASTs aggregates;
+    ASTs window_function_asts;
     ASTs expressions_with_window_function;
 
     /// Which column is needed to be ARRAY-JOIN'ed to get the specified.

From 5d0c1e83a355757da87ad99ef46a97c4be92ef29 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 16 Jul 2023 03:14:58 +0200
Subject: [PATCH 080/242] Remove a whitespace

---
 tests/queries/0_stateless/02364_window_view_segfault.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02364_window_view_segfault.sh b/tests/queries/0_stateless/02364_window_view_segfault.sh
index 3def22f4a9e..9accf7f30c3 100755
--- a/tests/queries/0_stateless/02364_window_view_segfault.sh
+++ b/tests/queries/0_stateless/02364_window_view_segfault.sh
@@ -12,6 +12,6 @@ opts=(
 ${CLICKHOUSE_CLIENT} "${opts[@]}" --multiquery --multiline --query """
 DROP TABLE IF EXISTS mt ON CLUSTER test_shard_localhost;
 DROP TABLE IF EXISTS wv ON CLUSTER test_shard_localhost;
-CREATE TABLE mt  ON CLUSTER test_shard_localhost (a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
+CREATE TABLE mt ON CLUSTER test_shard_localhost (a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
 CREATE WINDOW VIEW wv ON CLUSTER test_shard_localhost TO input_deduplicated INNER ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 """ 2>&1 | grep -q -e "Code: 344" -e "Code: 60" && echo 'ok' || echo 'fail' ||:

From 7716479a37559afefc285191f780f1f424460826 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 17 Jul 2023 10:33:38 +0800
Subject: [PATCH 081/242] add comments for
 https://github.com/ClickHouse/ClickHouse/pull/52112

---
 src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index 899b84cc132..ad199abaf8f 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -233,6 +233,8 @@ namespace DB
             checkStatus(components_status, nested_column->getName(), format_name);
 
             /// Pass null null_map, because fillArrowArray will decide whether nested_type is nullable, if nullable, it will create a new null_map from nested_column
+	    /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable.
+	    /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type.
             fillArrowArray(column_name, nested_column, nested_type, nullptr, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
         }
     }

From 8ea335aca7a102d5d06114799c2ada616b894ea8 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 17 Jul 2023 10:43:13 +0800
Subject: [PATCH 082/242] update style

---
 src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index ad199abaf8f..f688efa3290 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -233,8 +233,8 @@ namespace DB
             checkStatus(components_status, nested_column->getName(), format_name);
 
             /// Pass null null_map, because fillArrowArray will decide whether nested_type is nullable, if nullable, it will create a new null_map from nested_column
-	    /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable.
-	    /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type.
+            /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable.
+            /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type.
             fillArrowArray(column_name, nested_column, nested_type, nullptr, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
         }
     }

From 708b980d693c13362b9222a0bfa74d1ab691a9f3 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 17 Jul 2023 05:36:24 +0000
Subject: [PATCH 083/242] Fix arg name to fix build

---
 src/Interpreters/SystemLog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index bf41ff12d2a..3f5ad8a5b6c 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -109,7 +109,7 @@ public:
         const String & table_name_,
         const String & storage_def_,
         size_t flush_interval_milliseconds_,
-        std::shared_ptr<SystemLogQueue<LogElement>> ex_queue = nullptr);
+        std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
 
     void shutdown() override;
 

From 0917cf05881f65faddb950daee60f67f53448129 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Mon, 17 Jul 2023 08:16:58 +0000
Subject: [PATCH 084/242] disable direct join for range dictionary

---
 src/Interpreters/JoinedTables.cpp             |  5 +++
 .../02815_range_dict_no_direct_join.reference |  6 ++++
 .../02815_range_dict_no_direct_join.sql       | 33 +++++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
 create mode 100644 tests/queries/0_stateless/02815_range_dict_no_direct_join.sql

diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index ee5c288afbb..29add31fd5d 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -337,6 +337,11 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
                     LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name);
                     return nullptr;
                 }
+                if (dictionary->getSpecialKeyType() == DictionarySpecialKeyType::Range)
+                {
+                    LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name);
+                    return nullptr;
+                }
 
                 auto dictionary_kv = std::dynamic_pointer_cast<const IKeyValueEntity>(dictionary);
                 table_join->setStorageJoin(dictionary_kv);
diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
new file mode 100644
index 00000000000..3576991f7a4
--- /dev/null
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
@@ -0,0 +1,6 @@
+1	0.1
+1	0.2
+2	0.3
+2	0.4
+3	0.5
+3	0.6
diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
new file mode 100644
index 00000000000..249ffdf8089
--- /dev/null
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
@@ -0,0 +1,33 @@
+CREATE TABLE discounts
+(
+    advertiser_id UInt64,
+    discount_start_date Date,
+    discount_end_date Nullable(Date),
+    amount Float64
+)
+ENGINE = Memory;
+
+INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1);
+INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2);
+INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3);
+INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4);
+INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5);
+INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6);
+
+CREATE DICTIONARY discounts_dict
+(
+    advertiser_id UInt64,
+    discount_start_date Date,
+    discount_end_date Nullable(Date),
+    amount Float64
+)
+PRIMARY KEY advertiser_id
+SOURCE(CLICKHOUSE(TABLE discounts))
+LIFETIME(MIN 600 MAX 900)
+LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max'))
+RANGE(MIN discount_start_date MAX discount_end_date);
+
+CREATE TABLE ids (id UInt64) ENGINE = Memory;
+INSERT INTO ids SELECT * FROM numbers(10);
+
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id SETTINGS join_algorithm = 'direct';
\ No newline at end of file

From 988b46e3f0563376fe981722d59150cfcf0287e5 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 17 Jul 2023 15:36:51 +0200
Subject: [PATCH 085/242] Fix

---
 ...etadataStorageFromStaticFilesWebServer.cpp | 49 +++-----------
 .../MetadataStorageFromStaticFilesWebServer.h |  3 +-
 .../ObjectStorages/Web/WebObjectStorage.cpp   | 64 ++++++++++++-------
 .../ObjectStorages/Web/WebObjectStorage.h     | 11 ++--
 4 files changed, 60 insertions(+), 67 deletions(-)

diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index 59e66969ec0..fa07ef8590a 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -33,46 +33,18 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
 
 bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
 {
-    fs::path fs_path(path);
-    if (fs_path.has_extension())
-        fs_path = fs_path.parent_path();
-
-    initializeIfNeeded(fs_path);
-
-    if (object_storage.files.empty())
-        return false;
-
-    if (object_storage.files.contains(path))
-        return true;
-
-    /// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
-    /// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
-    auto it = std::lower_bound(
-        object_storage.files.begin(),
-        object_storage.files.end(),
-        path,
-        [](const auto & file, const std::string & path_) { return file.first < path_; }
-    );
-    if (it == object_storage.files.end())
-        return false;
-
-    if (startsWith(it->first, path)
-        || (it != object_storage.files.begin() && startsWith(std::prev(it)->first, path)))
-        return true;
-
-    return false;
+    return object_storage.exists(path);
 }
 
 void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
 {
-    initializeIfNeeded(path);
-
     if (!exists(path))
 #ifdef NDEBUG
         throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
 #else
     {
         std::string all_files;
+        std::shared_lock shared_lock(object_storage.metadata_mutex);
         for (const auto & [file, _] : object_storage.files)
         {
             if (!all_files.empty())
@@ -87,33 +59,40 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
 bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
 {
     assertExists(path);
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
 }
 
 bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
 {
     assertExists(path);
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
 }
 
 uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
 {
     assertExists(path);
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     return object_storage.files.at(path).size;
 }
 
 StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
 {
     assertExists(path);
+
     auto fs_path = fs::path(object_storage.url) / path;
     std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
     remote_path = remote_path.substr(object_storage.url.size());
+
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
 }
 
 std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
 {
     std::vector<std::string> result;
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     for (const auto & [file_path, _] : object_storage.files)
     {
         if (file_path.starts_with(path))
@@ -122,22 +101,14 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
     return result;
 }
 
-void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
-{
-    if (object_storage.files.find(path) == object_storage.files.end())
-    {
-        object_storage.initialize(fs::path(object_storage.url) / path);
-    }
-}
-
 DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
 {
     std::vector<fs::path> dir_file_paths;
 
-    initializeIfNeeded(path);
     if (!exists(path))
         return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
 
+    std::shared_lock shared_lock(object_storage.metadata_mutex);
     for (const auto & [file_path, _] : object_storage.files)
     {
         if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "")
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
index a04a1359d34..96c749ad80c 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
@@ -13,13 +13,14 @@ class MetadataStorageFromStaticFilesWebServer final : public IMetadataStorage
 {
 private:
     friend class MetadataStorageFromStaticFilesWebServerTransaction;
+    using FileType = WebObjectStorage::FileType;
 
     const WebObjectStorage & object_storage;
     std::string root_path;
 
     void assertExists(const std::string & path) const;
 
-    void initializeIfNeeded(const std::string & path) const;
+    void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
 
 public:
     explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 690a0d3372c..755ac0a20f9 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -31,7 +31,7 @@ namespace ErrorCodes
     extern const int NETWORK_ERROR;
 }
 
-void WebObjectStorage::initialize(const String & uri_path) const
+void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
 {
     std::vector<String> directories_to_load;
     LOG_TRACE(log, "Loading metadata for directory: {}", uri_path);
@@ -81,8 +81,9 @@ void WebObjectStorage::initialize(const String & uri_path) const
             }
 
             file_path = file_path.substr(url.size());
-            files.emplace(std::make_pair(file_path, file_data));
             LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size);
+
+            files.emplace(std::make_pair(file_path, file_data));
         }
 
         files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
@@ -103,7 +104,7 @@ void WebObjectStorage::initialize(const String & uri_path) const
     }
 
     for (const auto & directory_path : directories_to_load)
-        initialize(directory_path);
+        initialize(directory_path, lock);
 }
 
 
@@ -118,31 +119,50 @@ WebObjectStorage::WebObjectStorage(
 
 bool WebObjectStorage::exists(const StoredObject & object) const
 {
-    const auto & path = object.remote_path;
+    return exists(object.remote_path);
+}
 
+bool WebObjectStorage::exists(const std::string & path) const
+{
     LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
 
-    if (files.find(path) != files.end())
+    std::shared_lock shared_lock(metadata_mutex);
+
+    if (files.find(path) == files.end())
+    {
+        shared_lock.unlock();
+        std::unique_lock unique_lock(metadata_mutex);
+        if (files.find(path) == files.end())
+        {
+            fs::path index_file_dir = fs::path(url) / path;
+            if (index_file_dir.has_extension())
+                index_file_dir = index_file_dir.parent_path();
+
+            initialize(index_file_dir, unique_lock);
+        }
+        unique_lock.unlock();
+        shared_lock.lock();
+    }
+
+    if (files.empty())
+        return false;
+
+    if (files.contains(path))
         return true;
 
-    if (path.ends_with(MergeTreeData::FORMAT_VERSION_FILE_NAME) && files.find(fs::path(path).parent_path() / "") == files.end())
-    {
-        try
-        {
-            initialize(fs::path(url) / fs::path(path).parent_path());
-            return files.find(path) != files.end();
-        }
-        catch (...)
-        {
-            const auto message = getCurrentExceptionMessage(false);
-            bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
-            if (can_throw)
-                throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
+    /// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
+    /// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
+    auto it = std::lower_bound(
+        files.begin(), files.end(), path,
+        [](const auto & file, const std::string & path_) { return file.first < path_; }
+    );
 
-            LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
-            return false;
-        }
-    }
+    if (it == files.end())
+        return false;
+
+    if (startsWith(it->first, path)
+        || (it != files.begin() && startsWith(std::prev(it)->first, path)))
+        return true;
 
     return false;
 }
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index e85b7224892..1a21d94e230 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -3,6 +3,7 @@
 #include "config.h"
 
 #include <Disks/ObjectStorages/IObjectStorage.h>
+#include <shared_mutex>
 
 namespace Poco
 {
@@ -93,9 +94,8 @@ public:
     bool isReadOnly() const override { return true; }
 
 protected:
-    void initialize(const String & uri_path) const;
-
     [[noreturn]] static void throwNotAllowed();
+    bool exists(const std::string & path) const;
 
     enum class FileType
     {
@@ -111,12 +111,13 @@ protected:
 
     using Files = std::map<String, FileData>; /// file path -> file data
     mutable Files files;
-
-    String url;
+    mutable std::shared_mutex metadata_mutex;
 
 private:
-    Poco::Logger * log;
+    void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;
 
+    const String url;
+    Poco::Logger * log;
     size_t min_bytes_for_seek;
 };
 

From 13b493a353780a03f280dbd21df34e48964dc7bf Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 17 Jul 2023 15:53:47 +0200
Subject: [PATCH 086/242] Add comment

---
 src/Disks/ObjectStorages/Web/WebObjectStorage.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 755ac0a20f9..502ca8da8c2 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -140,6 +140,7 @@ bool WebObjectStorage::exists(const std::string & path) const
 
             initialize(index_file_dir, unique_lock);
         }
+        /// Files are never deleted from `files` as disk is read only, so no worry that we unlock now.
         unique_lock.unlock();
         shared_lock.lock();
     }

From d11ded952e3e22f576d892ffe46db44920cefea4 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 17 Jul 2023 16:09:11 +0200
Subject: [PATCH 087/242] Update WebObjectStorage.cpp

---
 src/Disks/ObjectStorages/Web/WebObjectStorage.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 502ca8da8c2..8a12833281c 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -28,7 +28,6 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
-    extern const int NETWORK_ERROR;
 }
 
 void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const

From 1dbb86a045a3e505cf7e71de45e992cc1b288bd3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 29 Jun 2023 13:38:46 +0200
Subject: [PATCH 088/242] Add _gen to ignore (generated by pytest)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index a04c60d5ca3..39d6f3f9fc8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,6 +161,7 @@ tests/queries/0_stateless/test_*
 tests/queries/0_stateless/*.binary
 tests/queries/0_stateless/*.generated-expect
 tests/queries/0_stateless/*.expect.history
+tests/integration/**/_gen
 
 # rust
 /rust/**/target

From 6f6562c96bf5fb7157ff582815bae1330d34c50f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 28 Jun 2023 10:23:40 +0200
Subject: [PATCH 089/242] Fix running integration tests with spaces in it's
 names

Previously they were simply ignored, and not only them, but all the
tests in that group, here is an example [1]

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/51448/4ed462ac7834a8180f92ca7d7d3c076e687bfca9/integration_tests__asan__[4_6].html

Note, that I've replaced PYTEST_OPTS with handled by pytest itself
PYTEST_ADDOPTS, since "sh -c $PYTEST_OPTS" does not allows you to
preserve the arguments correctly.

v5: use double quotes for PYTEST_ADDOPTS value
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/integration/runner/Dockerfile |  3 ++-
 tests/integration/ci-runner.py            |  3 ++-
 tests/integration/runner                  | 13 ++++++++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 0d1fa00b214..8e95d94b6dc 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -135,4 +135,5 @@ ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
 
 EXPOSE 2375
 ENTRYPOINT ["dockerd-entrypoint.sh"]
-CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
+# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS
+CMD ["sh", "-c", "pytest"]
diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 31f4a7666c8..98675cb0b34 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -10,6 +10,7 @@ import random
 import shutil
 import subprocess
 import time
+import shlex
 import zlib  # for crc32
 
 
@@ -646,7 +647,7 @@ class ClickhouseIntegrationTestsRunner:
             info_basename = test_group_str + "_" + str(i) + ".nfo"
             info_path = os.path.join(repo_path, "tests/integration", info_basename)
 
-            test_cmd = " ".join([test for test in sorted(test_names)])
+            test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)])
             parallel_cmd = (
                 " --parallel {} ".format(num_workers) if num_workers > 0 else ""
             )
diff --git a/tests/integration/runner b/tests/integration/runner
index c124ad46447..d524948dbcf 100755
--- a/tests/integration/runner
+++ b/tests/integration/runner
@@ -11,6 +11,7 @@ import subprocess
 import sys
 import string
 import random
+import shlex
 
 
 def random_str(length=6):
@@ -407,8 +408,14 @@ if __name__ == "__main__":
     if args.analyzer:
         use_analyzer = "-e CLICKHOUSE_USE_NEW_ANALYZER=1"
 
-    pytest_opts = " ".join(args.pytest_args).replace("'", "\\'")
-    tests_list = " ".join(args.tests_list)
+    # NOTE: since pytest options is in the argument value already we need to additionally escape '"'
+    pytest_opts = " ".join(
+        map(lambda x: shlex.quote(x).replace('"', '\\"'), args.pytest_args)
+    )
+    tests_list = " ".join(
+        map(lambda x: shlex.quote(x).replace('"', '\\"'), args.tests_list)
+    )
+
     cmd_base = (
         f"docker run {net} {tty} --rm --name {CONTAINER_NAME} "
         "--privileged --dns-search='.' "  # since recent dns search leaks from host
@@ -420,7 +427,7 @@ if __name__ == "__main__":
         f"--volume={args.src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos "
         f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} "
         f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_analyzer} -e PYTHONUNBUFFERED=1 "
-        f"-e PYTEST_OPTS='{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv'"
+        f'-e PYTEST_ADDOPTS="{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv"'
         f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}"
     )
 

From e532d5b376ebadaa8fad5dd5c026e082bf851b7a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 29 Jun 2023 13:48:15 +0200
Subject: [PATCH 090/242] tests: use -- over quotes on and on in ci-runner.py

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/ci-runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 98675cb0b34..4f40058b3d9 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -412,7 +412,7 @@ class ClickhouseIntegrationTestsRunner:
         out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log")
         cmd = (
             "cd {repo_path}/tests/integration && "
-            "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' "
+            "timeout -s 9 1h ./runner {runner_opts} {image_cmd} -- --setup-plan "
             "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' "
             "| grep -v 'SKIPPED' | sort -u  > {out_file}".format(
                 repo_path=repo_path,
@@ -656,7 +656,7 @@ class ClickhouseIntegrationTestsRunner:
             # -E -- (E)rror
             # -p -- (p)assed
             # -s -- (s)kipped
-            cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} '-rfEps --run-id={} --color=no --durations=0 {}' | tee {}".format(
+            cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format(
                 repo_path,
                 self._get_runner_opts(),
                 image_cmd,

From acbdbf6ab744ec4659c7ed3ac613c1487a5ab021 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 29 Jun 2023 13:49:03 +0200
Subject: [PATCH 091/242] tests: do not use --format if --quiet is set in
 integration runner

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/runner | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/integration/runner b/tests/integration/runner
index d524948dbcf..1b902803741 100755
--- a/tests/integration/runner
+++ b/tests/integration/runner
@@ -136,9 +136,7 @@ def check_args_and_update_paths(args):
 
 def docker_kill_handler_handler(signum, frame):
     subprocess.check_call(
-        'docker ps --all --quiet --filter name={name} --format="{{{{.ID}}}}"'.format(
-            name=CONTAINER_NAME
-        ),
+        "docker ps --all --quiet --filter name={name}".format(name=CONTAINER_NAME),
         shell=True,
     )
     raise KeyboardInterrupt("Killed by Ctrl+C")
@@ -438,7 +436,7 @@ if __name__ == "__main__":
     )
 
     containers = subprocess.check_output(
-        f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}",
+        f"docker ps --all --quiet --filter name={CONTAINER_NAME}",
         shell=True,
         universal_newlines=True,
     ).splitlines()

From 6c986c1fb22e2a5fff2bc281d55eb545868a4645 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 12 Jul 2023 15:21:28 +0200
Subject: [PATCH 092/242] Allow test names with spaces in it

It had been fixed and now it is allowed.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/ci-runner.py | 41 +++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 4f40058b3d9..f2adea78da7 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -7,6 +7,7 @@ import json
 import logging
 import os
 import random
+import re
 import shutil
 import subprocess
 import time
@@ -111,16 +112,36 @@ def get_counters(fname):
             if not (".py::" in line and " " in line):
                 continue
 
-            line_arr = line.strip().split(" ")
+            line = line.strip()
+            # [gw0] [  7%] ERROR test_mysql_protocol/test.py::test_golang_client
+            # ^^^^^^^^^^^^^
+            if line.strip().startswith("["):
+                line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line)
+
+            line_arr = line.split(" ")
             if len(line_arr) < 2:
                 logging.debug("Strange line %s", line)
                 continue
 
             # Lines like:
-            #     [gw0] [  7%] ERROR test_mysql_protocol/test.py::test_golang_client
-            #     [gw3] [ 40%] PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA]
-            state = line_arr[-2]
-            test_name = line_arr[-1]
+            #
+            #     ERROR test_mysql_protocol/test.py::test_golang_client
+            #     PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA]
+            #     PASSED test_drop_is_lock_free/test.py::test_query_is_lock_free[detach part]
+            #
+            state = line_arr.pop(0)
+            test_name = " ".join(line_arr)
+
+            # Normalize test names for lines like this:
+            #
+            #    FAILED test_storage_s3/test.py::test_url_reconnect_in_the_middle - Exception
+            #    FAILED test_distributed_ddl/test.py::test_default_database[configs] - AssertionError: assert ...
+            #
+            test_name = re.sub(
+                r"^(?P<test_name>[^\[\] ]+)(?P<test_param>\[[^\[\]]*\]|)(?P<test_error> - .*|)$",
+                r"\g<test_name>\g<test_param>",
+                test_name,
+            )
 
             if state in counters:
                 counters[state].add(test_name)
@@ -1000,16 +1021,6 @@ class ClickhouseIntegrationTestsRunner:
         if "(memory)" in self.params["context_name"]:
             result_state = "success"
 
-        for res in test_result:
-            # It's not easy to parse output of pytest
-            # Especially when test names may contain spaces
-            # Do not allow it to avoid obscure failures
-            if " " not in res[0]:
-                continue
-            logging.warning("Found invalid test name with space: %s", res[0])
-            status_text = "Found test with invalid name, see main log"
-            result_state = "failure"
-
         return result_state, status_text, test_result, []
 
 
From ee5e639ce9ea4ed07f9fb309c9dbc6c1ad2193a7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 17 Jul 2023 17:29:47 +0200
Subject: [PATCH 093/242] Update analyzer integration broken tests list

After proper parsing more failed tests pops up:
- https://s3.amazonaws.com/clickhouse-test-reports/51514/aa90e11241a97e0fd3f36c5ea0b57db50e299d2d/integration_tests__asan__analyzer__[4_6].html
- https://s3.amazonaws.com/clickhouse-test-reports/51514/aa90e11241a97e0fd3f36c5ea0b57db50e299d2d/integration_tests__asan__analyzer__[5_6].html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/analyzer_integration_broken_tests.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index 46b9ade43de..02f70c8a6df 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -196,3 +196,8 @@ test_quota/test.py::test_tracking_quota
 test_quota/test.py::test_users_xml_is_readonly
 test_replicating_constants/test.py::test_different_versions
 test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node]
+test_drop_is_lock_free/test.py::test_query_is_lock_free[detach table]
+test_backward_compatibility/test_data_skipping_indices.py::test_index
+test_backward_compatibility/test_convert_ordinary.py::test_convert_ordinary_to_atomic
+test_backward_compatibility/test_memory_bound_aggregation.py::test_backward_compatability
+test_odbc_interaction/test.py::test_postgres_insert

From 76abbce91829c8ec47b952d5d26a0d9df8ce2f85 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Mon, 17 Jul 2023 16:33:04 +0000
Subject: [PATCH 094/242] Trying to add methods to queue

---
 src/Common/SystemLogBase.cpp   | 102 ++++++++++++++++++++++-----------
 src/Common/SystemLogBase.h     |  28 ++++-----
 src/Interpreters/SystemLog.cpp |  28 ++-------
 src/Interpreters/SystemLog.h   |   4 +-
 4 files changed, 85 insertions(+), 77 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 7373786e514..c979613b146 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -42,11 +42,8 @@ template <typename LogElement>
 SystemLogBase<LogElement>::SystemLogBase(
     const String & name_,
     std::shared_ptr<SystemLogQueue<LogElement>> queue_)
+    : queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(name_))
 {
-    if (queue_)
-        queue = queue_;
-    else
-        queue = std::make_shared<SystemLogQueue<LogElement>>(name_);
 }
 
 template <typename LogElement>
@@ -153,27 +150,40 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
 }
 
 template <typename LogElement>
-void SystemLogBase<LogElement>::add(const LogElement & element)
+uint64_t SystemLogQueue<LogElement>::notifyFlush(bool force)
 {
-    queue->add(element);
+    uint64_t this_thread_requested_offset;
+
+    {
+        std::lock_guard lock(mutex);
+        if (is_shutdown)
+            return uint64_t(-1);
+
+        this_thread_requested_offset = queue_front_index + queue.size();
+
+        // Publish our flush request, taking care not to overwrite the requests
+        // made by other threads.
+        is_force_prepare_tables |= force;
+        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
+
+        flush_event.notify_all();
+    }
+
+    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
+    return this_thread_requested_offset;
 }
 
 template <typename LogElement>
-void SystemLogBase<LogElement>::flush(bool force)
+void SystemLogQueue<LogElement>::waitFlush(uint64_t this_thread_requested_offset_)
 {
-    uint64_t this_thread_requested_offset = notifyFlushImpl(force);
-    if (this_thread_requested_offset == uint64_t(-1))
-        return;
-
-
     // Use an arbitrary timeout to avoid endless waiting. 60s proved to be
     // too fast for our parallel functional tests, probably because they
     // heavily load the disk.
     const int timeout_seconds = 180;
-    std::unique_lock lock(queue->mutex);
-    bool result = queue->flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
+    std::unique_lock lock(mutex);
+    bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
     {
-        return flushed_up_to >= this_thread_requested_offset && !is_force_prepare_tables;
+        return flushed_up_to >= this_thread_requested_offset_ && !is_force_prepare_tables;
     });
 
     if (!result)
@@ -183,33 +193,57 @@ void SystemLogBase<LogElement>::flush(bool force)
     }
 }
 
-template <typename LogElement>
-void SystemLogBase<LogElement>::notifyFlush(bool force) { notifyFlushImpl(force); }
+constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
 
 template <typename LogElement>
-uint64_t SystemLogBase<LogElement>::notifyFlushImpl(bool force)
+void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread)
 {
-    uint64_t this_thread_requested_offset;
+    std::unique_lock lock(queue->mutex);
+    flush_event.wait_for(lock,
+        std::chrono::milliseconds(flush_interval_milliseconds),
+        [&] ()
+        {
+            return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
+        }
+    );
 
-    {
-        std::lock_guard lock(queue->mutex);
-        if (is_shutdown)
-            return uint64_t(-1);
+    queue_front_index += queue->size();
+    to_flush_end = queue->queue_front_index;
+    // Swap with existing array from previous flush, to save memory
+    // allocations.
+    output.resize(0);
+    queue.swap(to_flush);
 
-        this_thread_requested_offset = queue->queue_front_index + queue->queue.size();
+    should_prepare_tables_anyway = is_force_prepare_tables;
 
-        // Publish our flush request, taking care not to overwrite the requests
-        // made by other threads.
-        is_force_prepare_tables |= force;
-        queue->requested_flush_up_to = std::max(queue->requested_flush_up_to, this_thread_requested_offset);
-
-        queue->flush_event.notify_all();
-    }
-
-    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
-    return this_thread_requested_offset;
+    exit_this_thread = is_shutdown;
 }
 
+template <typename LogElement>
+void SystemLogBase<LogElement>::add(const LogElement & element)
+{
+    queue->add(element);
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::flush(bool force)
+{
+    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
+    if (this_thread_requested_offset == uint64_t(-1))
+        return;
+
+    queue->waitFlush(this_thread_requested_offset);
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
+
+// template <typename LogElement>
+// uint64_t SystemLogBase<LogElement>::notifyFlushImpl(bool force)
+// {
+//     return queue->notifyFlush(force);
+// }
+
 #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
 
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 109334964d2..0c1f32672a9 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -81,6 +81,10 @@ public:
     //void push_back(const LogElement & element) { queue.push_back(element); }
     void shutdown() { is_shutdown = true; }
 
+    uint64_t notifyFlush(bool force);
+    void waitFlush(uint64_t this_thread_requested_offset_);
+    void pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread)
+
     // Queue is bounded. But its size is quite large to not block in all normal cases.
     std::vector<LogElement> queue;
     // An always-incrementing index of the first message currently in the queue.
@@ -96,12 +100,16 @@ public:
     // Requested to flush logs up to this index, exclusive
     uint64_t requested_flush_up_to = 0;
 
-    // Logged overflow message at this queue front index
-    uint64_t logged_queue_full_at_index = -1;
-
+    // A flag that says we must create the tables even if the queue is empty.
+    bool is_force_prepare_tables = false;
+    
+    // Flushed log up to this index, exclusive
+    uint64_t flushed_up_to = 0;
 private:
     Poco::Logger * log;
     bool is_shutdown = false;
+    // Logged overflow message at this queue front index
+    uint64_t logged_queue_full_at_index = -1;
 };
 
 template <typename LogElement>
@@ -138,21 +146,7 @@ protected:
 
     std::shared_ptr<SystemLogQueue<LogElement>> queue;
 
-    // A flag that says we must create the tables even if the queue is empty.
-    bool is_force_prepare_tables = false;
-
-    // Flushed log up to this index, exclusive
-    uint64_t flushed_up_to = 0;
-
     bool is_shutdown = false;
-
-    // Logged overflow message at this queue front index
-    uint64_t logged_queue_full_at_index = -1;
-
-private:
-    uint64_t notifyFlushImpl(bool force);
-
-
 };
 
 }
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index b77cb2311d5..983c9f87c45 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -372,27 +372,7 @@ void SystemLog<LogElement>::savingThreadFunction()
             // Should we prepare table even if there are no new messages.
             bool should_prepare_tables_anyway = false;
 
-            {
-                std::unique_lock lock(queue->mutex);
-                queue->flush_event.wait_for(lock,
-                    std::chrono::milliseconds(flush_interval_milliseconds),
-                    [&] ()
-                    {
-                        return queue->requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
-                    }
-                );
-
-                queue->queue_front_index += queue->size();
-                to_flush_end = queue->queue_front_index;
-                // Swap with existing array from previous flush, to save memory
-                // allocations.
-                to_flush.resize(0);
-                queue->queue.swap(to_flush);
-
-                should_prepare_tables_anyway = is_force_prepare_tables;
-
-                exit_this_thread = is_shutdown;
-            }
+            queue->pop(to_flush, to_flush_end, should_prepare_tables_anyway, exit_this_thread);
 
             if (to_flush.empty())
             {
@@ -402,7 +382,7 @@ void SystemLog<LogElement>::savingThreadFunction()
                     LOG_TRACE(log, "Table created (force)");
 
                     std::lock_guard lock(queue->mutex);
-                    is_force_prepare_tables = false;
+                     queue->is_force_prepare_tables = false;
                     queue->flush_event.notify_all();
                 }
             }
@@ -477,8 +457,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
 
     {
         std::lock_guard lock(queue->mutex);
-        flushed_up_to = to_flush_end;
-        is_force_prepare_tables = false;
+        queue->flushed_up_to = to_flush_end;
+        queue->is_force_prepare_tables = false;
         queue->flush_event.notify_all();
     }
 
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 3f5ad8a5b6c..c730aa23319 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -121,9 +121,9 @@ protected:
     using Base::log;
     using Base::queue;
    // using Base::queue_front_index;
-    using Base::is_force_prepare_tables;
+ //   using Base::is_force_prepare_tables;
     //using Base::requested_flush_up_to;
-    using Base::flushed_up_to;
+  //  using Base::flushed_up_to;
  //   using Base::logged_queue_full_at_index;
 
 private:

From 0902ee1ec7c191cce834467384ab8b4ce4db3ac5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jul 2023 01:36:44 +0200
Subject: [PATCH 095/242] Remove useless header

---
 src/Common/DateLUTImpl.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp
index d7bdd0bb3d9..8146b35cc5f 100644
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@@ -10,7 +10,6 @@
 #include <cassert>
 #include <chrono>
 #include <cstring>
-#include <iostream>
 #include <memory>
 
 
From b5f19371339ae1b98efae9421c9d6d9bd94a75fd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jul 2023 01:48:12 +0200
Subject: [PATCH 096/242] Update more tests

---
 .../0_stateless/00753_system_columns_and_system_tables_long.sql | 2 +-
 .../0_stateless/02241_filesystem_cache_on_write_operations.sh   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index f4c4110cd5b..16085c8a995 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -12,7 +12,7 @@ CREATE TABLE check_system_tables
     ORDER BY name1
     PARTITION BY name2
     SAMPLE BY name1
-    SETTINGS min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false;
+    SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
 
 SELECT name, partition_key, sorting_key, primary_key, sampling_key, storage_policy, total_rows
 FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase()
diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh
index e65bf9cb35f..3a22ddfd7f5 100755
--- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh
+++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh
@@ -11,7 +11,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do
     echo "Using storage policy: $STORAGE_POLICY"
 
     $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241"
-    $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
+    $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1"
     $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02241"
 
     $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE"

From a4b92652da148bef3cf13d0182378ba8ca366c24 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jul 2023 01:50:43 +0200
Subject: [PATCH 097/242] Update more tests

---
 tests/integration/test_multiple_disks/test.py |  2 +-
 tests/integration/test_partition/test.py      | 20 +++++++++----------
 .../test_polymorphic_parts/test.py            |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index bc7ac6683af..0724791c940 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -1718,7 +1718,7 @@ def test_freeze(start_cluster):
             ) ENGINE = MergeTree
             ORDER BY tuple()
             PARTITION BY toYYYYMM(d)
-            SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false
+            SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1
         """
         )
 
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index 93f03f4420e..2517b2d1ae6 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -38,7 +38,7 @@ def partition_table_simple(started_cluster):
     q(
         "CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) "
         "ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) "
-        "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
+        "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )")
     q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )")
@@ -150,7 +150,7 @@ def partition_table_complex(started_cluster):
     q("DROP TABLE IF EXISTS test.partition_complex")
     q(
         "CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) "
-        "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
+        "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)")
     q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)")
@@ -188,7 +188,7 @@ def test_partition_complex(partition_table_complex):
 def cannot_attach_active_part_table(started_cluster):
     q("DROP TABLE IF EXISTS test.attach_active")
     q(
-        "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
+        "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16")
 
@@ -217,7 +217,7 @@ def attach_check_all_parts_table(started_cluster):
     q("DROP TABLE IF EXISTS test.attach_partition")
     q(
         "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n "
-        "SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
+        "SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
     )
     q(
         "INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@@ -299,7 +299,7 @@ def drop_detached_parts_table(started_cluster):
     q("SYSTEM STOP MERGES")
     q("DROP TABLE IF EXISTS test.drop_detached")
     q(
-        "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
+        "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q(
         "INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@@ -370,13 +370,13 @@ def test_drop_detached_parts(drop_detached_parts_table):
 
 def test_system_detached_parts(drop_detached_parts_table):
     q(
-        "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false"
+        "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q(
-        "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
+        "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q(
-        "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
+        "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
     q(
         "create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x"
@@ -497,7 +497,7 @@ def test_system_detached_parts(drop_detached_parts_table):
 def test_detached_part_dir_exists(started_cluster):
     q(
         "create table detached_part_dir_exists (n int) engine=MergeTree order by n "
-        "SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
+        "SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
     )
     q("insert into detached_part_dir_exists select 1")  # will create all_1_1_0
     q(
@@ -549,7 +549,7 @@ def test_detached_part_dir_exists(started_cluster):
 
 def test_make_clone_in_detached(started_cluster):
     q(
-        "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false"
+        "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
     )
 
     path = path_to_data + "data/default/clone_in_detached/"
diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py
index fb1f363b825..c5859146fe9 100644
--- a/tests/integration/test_polymorphic_parts/test.py
+++ b/tests/integration/test_polymorphic_parts/test.py
@@ -498,7 +498,7 @@ def test_polymorphic_parts_index(start_cluster):
         """
         CREATE TABLE test_index.index_compact(a UInt32, s String)
         ENGINE = MergeTree ORDER BY a
-        SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false"""
+        SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"""
     )
 
     node1.query(

From 806caea2821480f04cb26a07842ce06ed3d0f319 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jul 2023 01:56:38 +0200
Subject: [PATCH 098/242] Update more tests

---
 .../test_merge_tree_s3/configs/config.d/storage_conf.xml         | 1 +
 .../configs/config.d/storage_conf.xml                            | 1 +
 .../configs/config.d/storage_conf.xml                            | 1 +
 tests/integration/test_s3_zero_copy_ttl/configs/s3.xml           | 1 +
 4 files changed, 4 insertions(+)

diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml
index 504280e4bed..4f0e2db9b08 100644
--- a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml
@@ -152,6 +152,7 @@
 
     <merge_tree>
         <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 
     <database_catalog_unused_dir_hide_timeout_sec>0</database_catalog_unused_dir_hide_timeout_sec>
diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml
index 00aa03b1a92..829bf16fdfb 100644
--- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml
@@ -35,6 +35,7 @@
     <merge_tree>
         <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
         <allow_remote_fs_zero_copy_replication>0</allow_remote_fs_zero_copy_replication>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 
     <remote_servers>
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
index 96d59d5633e..f78256bdb26 100644
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
@@ -29,6 +29,7 @@
     <merge_tree>
         <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
         <allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
     </merge_tree>
 
     <remote_servers>
diff --git a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml
index e179c848be1..7bb7fa875e4 100644
--- a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml
+++ b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml
@@ -32,6 +32,7 @@
 
   <merge_tree>
     <allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
+    <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
   </merge_tree>
 
   <allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>

From f3dcf4db3a30f552932aa08b72116935de9d519d Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 18 Jul 2023 11:00:39 +0800
Subject: [PATCH 099/242] update 02815_range_dict_no_direct_join.sql

---
 tests/queries/0_stateless/02815_range_dict_no_direct_join.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
index 249ffdf8089..f4e5ccccf78 100644
--- a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
@@ -30,4 +30,4 @@ RANGE(MIN discount_start_date MAX discount_end_date);
 CREATE TABLE ids (id UInt64) ENGINE = Memory;
 INSERT INTO ids SELECT * FROM numbers(10);
 
-SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id SETTINGS join_algorithm = 'direct';
\ No newline at end of file
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct';

From 9b7df9e12e9e85e1a2ecb2788c56dff74b75b309 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 18 Jul 2023 05:51:55 +0000
Subject: [PATCH 100/242] fix for analyzer

---
 src/Interpreters/TableJoin.h                                | 4 ++--
 src/Planner/PlannerJoins.cpp                                | 3 ++-
 .../0_stateless/02815_range_dict_no_direct_join.reference   | 6 ++++++
 .../queries/0_stateless/02815_range_dict_no_direct_join.sql | 1 +
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index ba3befab59b..5d14a57759f 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -223,10 +223,10 @@ public:
     {
         /// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
         /// It's behaviour that was initially supported by clickhouse.
-        bool is_enbaled_by_default = val == JoinAlgorithm::DEFAULT
+        bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT
                                   || val == JoinAlgorithm::HASH
                                   || val == JoinAlgorithm::DIRECT;
-        if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enbaled_by_default)
+        if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default)
             return true;
         return join_algorithm.isSet(val);
     }
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 7da10a8523b..e495b0967e9 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -542,7 +542,8 @@ void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::sh
     if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
         return;
 
-    if (auto storage_dictionary = std::dynamic_pointer_cast<StorageDictionary>(storage); storage_dictionary)
+    if (auto storage_dictionary = std::dynamic_pointer_cast<StorageDictionary>(storage);
+        storage_dictionary && storage_dictionary->getDictionary()->getSpecialKeyType() != DictionarySpecialKeyType::Range)
         table_join->setStorageJoin(std::dynamic_pointer_cast<const IKeyValueEntity>(storage_dictionary->getDictionary()));
     else if (auto storage_key_value = std::dynamic_pointer_cast<IKeyValueEntity>(storage); storage_key_value)
         table_join->setStorageJoin(storage_key_value);
diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
index 3576991f7a4..051716a791e 100644
--- a/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference
@@ -4,3 +4,9 @@
 2	0.4
 3	0.5
 3	0.6
+1	0.1
+1	0.2
+2	0.3
+2	0.4
+3	0.5
+3	0.6
diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
index f4e5ccccf78..e3af53fa335 100644
--- a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
+++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql
@@ -31,3 +31,4 @@ CREATE TABLE ids (id UInt64) ENGINE = Memory;
 INSERT INTO ids SELECT * FROM numbers(10);
 
 SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct';
+SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS allow_experimental_analyzer = 1;

From 6b00a38aa45d79b72088f608a993579a0fe31d9a Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 10:06:44 +0000
Subject: [PATCH 101/242] Remove SystemLogBase (wip)

---
 src/Common/SystemLogBase.cpp   | 96 +++++++++-------------------------
 src/Common/SystemLogBase.h     | 75 ++++++++------------------
 src/Interpreters/SystemLog.cpp | 62 +++++++++++++++++-----
 src/Interpreters/SystemLog.h   | 38 +++++++++-----
 src/Interpreters/TextLog.cpp   |  2 +-
 src/Interpreters/TextLog.h     |  4 +-
 src/Loggers/Loggers.cpp        |  5 +-
 7 files changed, 127 insertions(+), 155 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index c979613b146..dc6a4a08bb7 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -38,46 +38,13 @@ namespace
 
 ISystemLog::~ISystemLog() = default;
 
+
 template <typename LogElement>
-SystemLogBase<LogElement>::SystemLogBase(
+SystemLogQueue<LogElement>::SystemLogQueue(
     const String & name_,
-    std::shared_ptr<SystemLogQueue<LogElement>> queue_)
-    : queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(name_))
-{
-}
-
-template <typename LogElement>
-void SystemLogBase<LogElement>::stopFlushThread()
-{
-    {
-        std::lock_guard lock(queue->mutex);
-
-        if (!saving_thread || !saving_thread->joinable())
-            return;
-
-        if (is_shutdown)
-            return;
-
-        is_shutdown = true;
-        queue->shutdown();
-
-        /// Tell thread to shutdown.
-        queue->flush_event.notify_all();
-    }
-
-    saving_thread->join();
-}
-
-template <typename LogElement>
-void SystemLogBase<LogElement>::startup()
-{
-    std::lock_guard lock(queue->mutex);
-    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
-}
-
-template <typename LogElement>
-SystemLogQueue<LogElement>::SystemLogQueue(const String & name_)
+    size_t flush_interval_milliseconds_)
     : log(&Poco::Logger::get(name_))
+    , flush_interval_milliseconds(flush_interval_milliseconds_)
 {}
 
 static thread_local bool recursive_add_call = false;
@@ -149,6 +116,14 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
         LOG_INFO(log, "Queue is half full for system log '{}'.", demangle(typeid(*this).name()));
 }
 
+template <typename LogElement>
+void SystemLogQueue<LogElement>::shutdown()
+{ 
+    is_shutdown = true;         
+    /// Tell thread to shutdown.
+    flush_event.notify_all();
+}
+
 template <typename LogElement>
 uint64_t SystemLogQueue<LogElement>::notifyFlush(bool force)
 {
@@ -193,12 +168,19 @@ void SystemLogQueue<LogElement>::waitFlush(uint64_t this_thread_requested_offset
     }
 }
 
-constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
+template <typename LogElement>
+void SystemLogQueue<LogElement>::confirm(uint64_t to_flush_end)
+{
+    std::lock_guard lock(mutex);
+    flushed_up_to = to_flush_end;
+    is_force_prepare_tables = false;
+    flush_event.notify_all();
+}
 
 template <typename LogElement>
 void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread)
 {
-    std::unique_lock lock(queue->mutex);
+    std::unique_lock lock(mutex);
     flush_event.wait_for(lock,
         std::chrono::milliseconds(flush_interval_milliseconds),
         [&] ()
@@ -207,47 +189,19 @@ void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t&
         }
     );
 
-    queue_front_index += queue->size();
-    to_flush_end = queue->queue_front_index;
+    queue_front_index += queue.size();
+    to_flush_end = queue_front_index;
     // Swap with existing array from previous flush, to save memory
     // allocations.
     output.resize(0);
-    queue.swap(to_flush);
+    queue.swap(output);
 
     should_prepare_tables_anyway = is_force_prepare_tables;
 
     exit_this_thread = is_shutdown;
 }
 
-template <typename LogElement>
-void SystemLogBase<LogElement>::add(const LogElement & element)
-{
-    queue->add(element);
-}
-
-template <typename LogElement>
-void SystemLogBase<LogElement>::flush(bool force)
-{
-    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
-    if (this_thread_requested_offset == uint64_t(-1))
-        return;
-
-    queue->waitFlush(this_thread_requested_offset);
-}
-
-template <typename LogElement>
-void SystemLogBase<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
-
-// template <typename LogElement>
-// uint64_t SystemLogBase<LogElement>::notifyFlushImpl(bool force)
-// {
-//     return queue->notifyFlush(force);
-// }
-
-#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase<ELEMENT>;
+#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogQueue<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
 
-#define INSTANTIATE_SYSTEM_LOG_BASE2(ELEMENT) template class SystemLogQueue<ELEMENT>;
-SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE2)
-
 }
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 0c1f32672a9..4cdb07c2cab 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -68,22 +68,33 @@ public:
 
 protected:
     std::unique_ptr<ThreadFromGlobalPool> saving_thread;
+
+    bool is_shutdown = false;
 };
 
 template <typename LogElement>
 class SystemLogQueue
 {
 public:
-    SystemLogQueue(const String & name_);
+    SystemLogQueue(
+        const String & name_,
+        size_t flush_interval_milliseconds_);
 
+    // producer methods
     void add(const LogElement & element);
-    size_t size() const { return queue.size(); }
-    //void push_back(const LogElement & element) { queue.push_back(element); }
-    void shutdown() { is_shutdown = true; }
-
+    void shutdown();
     uint64_t notifyFlush(bool force);
     void waitFlush(uint64_t this_thread_requested_offset_);
-    void pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread)
+
+     // consumer methods
+    void pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread);
+    void confirm(uint64_t to_flush_end);
+
+    /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
+    std::mutex mutex;
+
+private:
+    Poco::Logger * log;
 
     // Queue is bounded. But its size is quite large to not block in all normal cases.
     std::vector<LogElement> queue;
@@ -92,61 +103,19 @@ public:
     // can wait until a particular message is flushed. This is used to implement
     // synchronous log flushing for SYSTEM FLUSH LOGS.
     uint64_t queue_front_index = 0;
-
-    /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
-    std::mutex mutex;
-    std::condition_variable flush_event;
-
-    // Requested to flush logs up to this index, exclusive
-    uint64_t requested_flush_up_to = 0;
-
     // A flag that says we must create the tables even if the queue is empty.
     bool is_force_prepare_tables = false;
-    
+    // Requested to flush logs up to this index, exclusive
+    uint64_t requested_flush_up_to = 0;
+    std::condition_variable flush_event;
     // Flushed log up to this index, exclusive
     uint64_t flushed_up_to = 0;
-private:
-    Poco::Logger * log;
-    bool is_shutdown = false;
     // Logged overflow message at this queue front index
     uint64_t logged_queue_full_at_index = -1;
-};
-
-template <typename LogElement>
-class SystemLogBase : public ISystemLog
-{
-public:
-    using Self = SystemLogBase;
-
-    SystemLogBase(
-        const String & name_,
-        std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
-
-    /** Append a record into log.
-      * Writing to table will be done asynchronously and in case of failure, record could be lost.
-      */
-    void add(const LogElement & element);
-
-    /// Flush data in the buffer to disk. Block the thread until the data is stored on disk.
-    void flush(bool force) override;
-
-    void startup() override;
-
-     void stopFlushThread() override;
-
-    /// Non-blocking flush data in the buffer to disk.
-    void notifyFlush(bool force);
-
-    String getName() const override { return LogElement::name(); }
-
-    static const char * getDefaultOrderBy() { return "event_date, event_time"; }
-
-protected:
-    Poco::Logger * log;
-
-    std::shared_ptr<SystemLogQueue<LogElement>> queue;
 
     bool is_shutdown = false;
+
+    const size_t flush_interval_milliseconds;
 };
 
 }
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 983c9f87c45..cb4578689a2 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -334,15 +334,21 @@ SystemLog<LogElement>::SystemLog(
     const String & storage_def_,
     size_t flush_interval_milliseconds_,
     std::shared_ptr<SystemLogQueue<LogElement>> queue_)
-    : Base(database_name_ + "." + table_name_, queue_)
-    , WithContext(context_)
+    : WithContext(context_)
+    , log(&Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"))
     , table_id(database_name_, table_name_)
     , storage_def(storage_def_)
     , create_query(serializeAST(*getCreateTableQuery()))
-    , flush_interval_milliseconds(flush_interval_milliseconds_)
+    , queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(database_name_ + "." + table_name_, flush_interval_milliseconds_))
 {
     assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE);
-    log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")");
+}
+
+template <typename LogElement>
+void SystemLog<LogElement>::startup()
+{
+    std::lock_guard lock(queue->mutex);
+    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
 template <typename LogElement>
@@ -355,6 +361,26 @@ void SystemLog<LogElement>::shutdown()
         table->flushAndShutdown();
 }
 
+template <typename LogElement>
+void SystemLog<LogElement>::stopFlushThread()
+{
+    {
+        std::lock_guard lock(queue->mutex);
+
+        if (!saving_thread || !saving_thread->joinable())
+            return;
+
+        if (is_shutdown)
+            return;
+
+        is_shutdown = true;
+        queue->shutdown();
+    }
+
+    saving_thread->join();
+}
+
+
 template <typename LogElement>
 void SystemLog<LogElement>::savingThreadFunction()
 {
@@ -381,9 +407,7 @@ void SystemLog<LogElement>::savingThreadFunction()
                     prepareTable();
                     LOG_TRACE(log, "Table created (force)");
 
-                    std::lock_guard lock(queue->mutex);
-                     queue->is_force_prepare_tables = false;
-                    queue->flush_event.notify_all();
+                    queue->confirm(to_flush_end);
                 }
             }
             else
@@ -455,12 +479,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 
-    {
-        std::lock_guard lock(queue->mutex);
-        queue->flushed_up_to = to_flush_end;
-        queue->is_force_prepare_tables = false;
-        queue->flush_event.notify_all();
-    }
+    queue->confirm(to_flush_end);
 
     LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end);
 }
@@ -599,7 +618,24 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
 
     return create;
 }
+template <typename LogElement>
+void SystemLog<LogElement>::add(const LogElement & element)
+{
+    queue->add(element);
+}
 
+template <typename LogElement>
+void SystemLog<LogElement>::flush(bool force)
+{
+    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
+    if (this_thread_requested_offset == uint64_t(-1))
+        return;
+
+    queue->waitFlush(this_thread_requested_offset);
+}
+
+template <typename LogElement>
+void SystemLog<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
 
 #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG)
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index c730aa23319..fe9e4aa35d2 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -89,11 +89,10 @@ struct SystemLogs
 
 
 template <typename LogElement>
-class SystemLog : public SystemLogBase<LogElement>, private boost::noncopyable, WithContext
+class SystemLog : public ISystemLog, private boost::noncopyable, WithContext
 {
 public:
     using Self = SystemLog;
-    using Base = SystemLogBase<LogElement>;
 
     /** Parameter: table name where to write log.
       * If table is not exists, then it get created with specified engine.
@@ -111,30 +110,41 @@ public:
         size_t flush_interval_milliseconds_,
         std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
 
+    void startup() override;
+    /** Append a record into log.
+      * Writing to table will be done asynchronously and in case of failure, record could be lost.
+      */
+    void add(const LogElement & element);
+
     void shutdown() override;
 
+    String getName() const override { return LogElement::name(); }
+    static const char * getDefaultOrderBy() { return "event_date, event_time"; }
+
+    /// Flush data in the buffer to disk. Block the thread until the data is stored on disk.
+    void flush(bool force) override;
+
+    /// Non-blocking flush data in the buffer to disk.
+    void notifyFlush(bool force);
+
+    void stopFlushThread() override;
+  
 protected:
-    //using ISystemLog::mutex;
-    using Base::is_shutdown;
-   // using ISystemLog::flush_event;
-    using Base::stopFlushThread;
-    using Base::log;
-    using Base::queue;
-   // using Base::queue_front_index;
- //   using Base::is_force_prepare_tables;
-    //using Base::requested_flush_up_to;
-  //  using Base::flushed_up_to;
- //   using Base::logged_queue_full_at_index;
+    Poco::Logger * log;
+  
+    using ISystemLog::is_shutdown;
+    using ISystemLog::saving_thread;
 
 private:
 
+
     /* Saving thread data */
     const StorageID table_id;
     const String storage_def;
     String create_query;
     String old_create_query;
     bool is_prepared = false;
-    const size_t flush_interval_milliseconds;
+    std::shared_ptr<SystemLogQueue<LogElement>> queue;
 
     /** Creates new table if it does not exist.
       * Renames old table if its structure is not suitable.
diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp
index a66092c1c2b..108135c78b3 100644
--- a/src/Interpreters/TextLog.cpp
+++ b/src/Interpreters/TextLog.cpp
@@ -84,7 +84,7 @@ TextLog::TextLog(ContextPtr context_, const String & database_name_,
         const String & table_name_, const String & storage_def_,
         size_t flush_interval_milliseconds_)
   : SystemLog<TextLogElement>(context_, database_name_, table_name_,
-        storage_def_, flush_interval_milliseconds_, getLogQueue())
+        storage_def_, flush_interval_milliseconds_, getLogQueue(flush_interval_milliseconds_))
 {
     // SystemLog methods may write text logs, so we disable logging for the text
     // log table to avoid recursion.
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 8390259e147..0febce03abc 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -47,9 +47,9 @@ public:
         const String & storage_def_,
         size_t flush_interval_milliseconds_);
 
-    static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue()
+    static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue(size_t flush_interval_milliseconds)
     {
-        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>("text_log");
+        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>("text_log", flush_interval_milliseconds);
         return queue;
     }
 };
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index fa143440cc2..a9869847f65 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -34,6 +34,7 @@ static std::string createDirectory(const std::string & file)
     return path;
 }
 
+constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
 
 void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
 {
@@ -254,7 +255,9 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
     {
         String text_log_level_str = config.getString("text_log.level", "");
         int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
-        split->addTextLog(DB::TextLog::getLogQueue(), text_log_level);
+        size_t flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds",
+                                                        DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
+        split->addTextLog(DB::TextLog::getLogQueue(flush_interval_milliseconds), text_log_level);
     }
 #endif
 }

From ed60bb90b28e9b325031db6cfcf438fa6a7ca51b Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 10:22:07 +0000
Subject: [PATCH 102/242] Move code to  be more beautiful in pr

---
 src/Common/SystemLogBase.cpp | 51 ++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index dc6a4a08bb7..38d675e4b5f 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -58,7 +58,8 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
         return;
     recursive_add_call = true;
     SCOPE_EXIT({ recursive_add_call = false; });
-     /// Memory can be allocated while resizing on queue.push_back.
+
+    /// Memory can be allocated while resizing on queue.push_back.
     /// The size of allocation can be in order of a few megabytes.
     /// But this should not be accounted for query memory usage.
     /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
@@ -124,30 +125,6 @@ void SystemLogQueue<LogElement>::shutdown()
     flush_event.notify_all();
 }
 
-template <typename LogElement>
-uint64_t SystemLogQueue<LogElement>::notifyFlush(bool force)
-{
-    uint64_t this_thread_requested_offset;
-
-    {
-        std::lock_guard lock(mutex);
-        if (is_shutdown)
-            return uint64_t(-1);
-
-        this_thread_requested_offset = queue_front_index + queue.size();
-
-        // Publish our flush request, taking care not to overwrite the requests
-        // made by other threads.
-        is_force_prepare_tables |= force;
-        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
-
-        flush_event.notify_all();
-    }
-
-    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
-    return this_thread_requested_offset;
-}
-
 template <typename LogElement>
 void SystemLogQueue<LogElement>::waitFlush(uint64_t this_thread_requested_offset_)
 {
@@ -201,6 +178,30 @@ void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t&
     exit_this_thread = is_shutdown;
 }
 
+template <typename LogElement>
+uint64_t SystemLogQueue<LogElement>::notifyFlush(bool force)
+{
+    uint64_t this_thread_requested_offset;
+
+    {
+        std::lock_guard lock(mutex);
+        if (is_shutdown)
+            return uint64_t(-1);
+
+        this_thread_requested_offset = queue_front_index + queue.size();
+
+        // Publish our flush request, taking care not to overwrite the requests
+        // made by other threads.
+        is_force_prepare_tables |= force;
+        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
+
+        flush_event.notify_all();
+    }
+
+    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
+    return this_thread_requested_offset;
+}
+
 #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogQueue<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
 

From 92ca2b0bdd4e2160831d14f1c855a55dbac1a108 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 18 Jul 2023 11:34:28 +0200
Subject: [PATCH 103/242] Make psql tests in test_odbc_interaction more robust
 to other test failures

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../integration/test_odbc_interaction/test.py | 295 +++++++++---------
 1 file changed, 155 insertions(+), 140 deletions(-)

diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index 14f5de17870..e8b3ba3fcf3 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -582,75 +582,83 @@ def test_sqlite_odbc_cached_dictionary(started_cluster):
 def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster):
     skip_test_msan(node1)
 
-    conn = get_postgres_conn(started_cluster)
-    cursor = conn.cursor()
-    cursor.execute(
-        "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')"
-    )
-    node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
-    node1.exec_in_container(
-        ["ss", "-K", "dport", "postgresql"], privileged=True, user="root"
-    )
-    node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
-    assert_eq_with_retry(
-        node1,
-        "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))",
-        "hello",
-    )
-    assert_eq_with_retry(
-        node1,
-        "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))",
-        "world",
-    )
-    cursor.execute("truncate table clickhouse.test_table")
+    try:
+        conn = get_postgres_conn(started_cluster)
+        cursor = conn.cursor()
+        cursor.execute(
+            "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')"
+        )
+        node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
+        node1.exec_in_container(
+            ["ss", "-K", "dport", "postgresql"], privileged=True, user="root"
+        )
+        node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
+        assert_eq_with_retry(
+            node1,
+            "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))",
+            "hello",
+        )
+        assert_eq_with_retry(
+            node1,
+            "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))",
+            "world",
+        )
+    finally:
+        cursor.execute("truncate table clickhouse.test_table")
 
 
 def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster):
     skip_test_msan(node1)
 
-    conn = get_postgres_conn(started_cluster)
-    cursor = conn.cursor()
-    cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')")
-    for i in range(100):
-        try:
-            node1.query("system reload dictionary postgres_odbc_hashed", timeout=15)
-        except Exception as ex:
-            assert False, "Exception occured -- odbc-bridge hangs: " + str(ex)
+    try:
+        conn = get_postgres_conn(started_cluster)
+        cursor = conn.cursor()
+        cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')")
+        for i in range(100):
+            try:
+                node1.query("system reload dictionary postgres_odbc_hashed", timeout=15)
+            except Exception as ex:
+                assert False, "Exception occured -- odbc-bridge hangs: " + str(ex)
 
-    assert_eq_with_retry(
-        node1,
-        "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))",
-        "xxx",
-    )
-    cursor.execute("truncate table clickhouse.test_table")
+        assert_eq_with_retry(
+            node1,
+            "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))",
+            "xxx",
+        )
+    finally:
+        cursor.execute("truncate table clickhouse.test_table")
 
 
 def test_no_connection_pooling(started_cluster):
     skip_test_msan(node1)
 
-    conn = get_postgres_conn(started_cluster)
-    cursor = conn.cursor()
-    cursor.execute(
-        "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')"
-    )
-    node1.exec_in_container(["ss", "-K", "dport", "5432"], privileged=True, user="root")
-    node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool")
-    assert_eq_with_retry(
-        node1,
-        "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))",
-        "hello",
-    )
-    assert_eq_with_retry(
-        node1,
-        "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))",
-        "world",
-    )
+    try:
+        conn = get_postgres_conn(started_cluster)
+        cursor = conn.cursor()
+        cursor.execute(
+            "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')"
+        )
+        node1.exec_in_container(
+            ["ss", "-K", "dport", "5432"], privileged=True, user="root"
+        )
+        node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool")
+        assert_eq_with_retry(
+            node1,
+            "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))",
+            "hello",
+        )
+        assert_eq_with_retry(
+            node1,
+            "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))",
+            "world",
+        )
 
-    # No open connections should be left because we don't use connection pooling.
-    assert "" == node1.exec_in_container(
-        ["ss", "-H", "dport", "5432"], privileged=True, user="root"
-    )
-    cursor.execute("truncate table clickhouse.test_table")
+        # No open connections should be left because we don't use connection pooling.
+        assert "" == node1.exec_in_container(
+            ["ss", "-H", "dport", "5432"], privileged=True, user="root"
+        )
+    finally:
+        cursor.execute("truncate table clickhouse.test_table")
 
 
 def test_postgres_insert(started_cluster):
@@ -662,112 +670,119 @@ def test_postgres_insert(started_cluster):
     # postgres .yml file). This is needed to check parsing, validation and
     # reconstruction of connection string.
 
-    node1.query(
-        "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')"
-    )
-    node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')")
-    assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n"
-    node1.query(
-        "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test"
-    )
-    node1.query(
-        "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')"
-        " select number, number, 's' || toString(number) from numbers (4, 7)"
-    )
-    assert (
-        node1.query("select sum(column1), count(column1) from pg_insert") == "55\t10\n"
-    )
-    assert (
+    try:
         node1.query(
-            "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))"
+            "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')"
         )
-        == "55\t10\n"
-    )
-    node1.query("DROP TABLE pg_insert")
-    conn.cursor().execute("truncate table clickhouse.test_table")
+        node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')")
+        assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n"
+        node1.query(
+            "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test"
+        )
+        node1.query(
+            "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')"
+            " select number, number, 's' || toString(number) from numbers (4, 7)"
+        )
+        assert (
+            node1.query("select sum(column1), count(column1) from pg_insert")
+            == "55\t10\n"
+        )
+        assert (
+            node1.query(
+                "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))"
+            )
+            == "55\t10\n"
+        )
+    finally:
+        node1.query("DROP TABLE IF EXISTS pg_insert")
+        conn.cursor().execute("truncate table clickhouse.test_table")
 
 
 def test_odbc_postgres_date_data_type(started_cluster):
     skip_test_msan(node1)
 
-    conn = get_postgres_conn(started_cluster)
-    cursor = conn.cursor()
-    cursor.execute(
-        "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)"
-    )
+    try:
+        conn = get_postgres_conn(started_cluster)
+        cursor = conn.cursor()
+        cursor.execute(
+            "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)"
+        )
 
-    cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')")
-    cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')")
-    cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')")
-    conn.commit()
+        cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')")
+        cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')")
+        cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')")
+        conn.commit()
 
-    node1.query(
-        """
-        CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date)
-        ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')"""
-    )
+        node1.query(
+            """
+            CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date)
+            ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')"""
+        )
 
-    expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n"
-    result = node1.query("SELECT * FROM test_date")
-    assert result == expected
-    cursor.execute("DROP TABLE clickhouse.test_date")
-    node1.query("DROP TABLE test_date")
+        expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n"
+        result = node1.query("SELECT * FROM test_date")
+        assert result == expected
+    finally:
+        cursor.execute("DROP TABLE clickhouse.test_date")
+        node1.query("DROP TABLE IF EXISTS test_date")
 
 
 def test_odbc_postgres_conversions(started_cluster):
     skip_test_msan(node1)
 
-    conn = get_postgres_conn(started_cluster)
-    cursor = conn.cursor()
+    try:
+        conn = get_postgres_conn(started_cluster)
+        cursor = conn.cursor()
 
-    cursor.execute(
-        """CREATE TABLE clickhouse.test_types (
-        a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial,
-        h timestamp)"""
-    )
+        cursor.execute(
+            """CREATE TABLE clickhouse.test_types (
+            a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial,
+            h timestamp)"""
+        )
 
-    node1.query(
-        """
-        INSERT INTO TABLE FUNCTION
-        odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')
-        VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')"""
-    )
+        node1.query(
+            """
+            INSERT INTO TABLE FUNCTION
+            odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')
+            VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')"""
+        )
 
-    result = node1.query(
-        """
-        SELECT a, b, c, d, e, f, g, h
-        FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')
-        """
-    )
+        result = node1.query(
+            """
+            SELECT a, b, c, d, e, f, g, h
+            FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')
+            """
+        )
 
-    assert (
-        result
-        == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n"
-    )
-    cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types")
+        assert (
+            result
+            == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n"
+        )
+        cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types")
 
-    cursor.execute(
-        """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)"""
-    )
+        cursor.execute(
+            """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)"""
+        )
 
-    node1.query(
-        """
-        CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1))
-        ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')"""
-    )
+        node1.query(
+            """
+            CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1))
+            ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')"""
+        )
 
-    node1.query(
-        """INSERT INTO test_types
-        SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)"""
-    )
+        node1.query(
+            """INSERT INTO test_types
+            SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)"""
+        )
 
-    expected = node1.query(
-        "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)"
-    )
-    result = node1.query("SELECT * FROM test_types")
-    cursor.execute("DROP TABLE clickhouse.test_types")
-    node1.query("DROP TABLE test_types")
-    assert result == expected
+        expected = node1.query(
+            "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)"
+        )
+        result = node1.query("SELECT * FROM test_types")
+        assert result == expected
+    finally:
+        cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types")
+        node1.query("DROP TABLE IF EXISTS test_types")
 
 
 def test_odbc_cyrillic_with_varchar(started_cluster):

From baf6d182d63dddf9dfb42d687d21ed2835f536a2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 18 Jul 2023 11:49:34 +0200
Subject: [PATCH 104/242] Fix reporting some BROKEN tests as ERROR

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/ci-runner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index f2adea78da7..a6e9716dc20 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -788,6 +788,7 @@ class ClickhouseIntegrationTestsRunner:
                     and test not in counters["ERROR"]
                     and test not in counters["SKIPPED"]
                     and test not in counters["FAILED"]
+                    and test not in counters["BROKEN"]
                     and "::" in test
                 ):
                     counters["ERROR"].append(test)

From 7466ef58376810d7a81a1e48ac14937d336cd97f Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 13:05:20 +0000
Subject: [PATCH 105/242] Add index

---
 src/Common/SystemLogBase.cpp    | 70 ++++++++++++++++-----------------
 src/Common/SystemLogBase.h      | 27 +++++++------
 src/Interpreters/SystemLog.cpp  |  4 +-
 src/Loggers/OwnSplitChannel.cpp |  2 +-
 4 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 38d675e4b5f..e9442617acd 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -47,17 +47,17 @@ SystemLogQueue<LogElement>::SystemLogQueue(
     , flush_interval_milliseconds(flush_interval_milliseconds_)
 {}
 
-static thread_local bool recursive_add_call = false;
+static thread_local bool recursive_push_call = false;
 
 template <typename LogElement>
-void SystemLogQueue<LogElement>::add(const LogElement & element)
+void SystemLogQueue<LogElement>::push(const LogElement & element)
 {
     /// It is possible that the method will be called recursively.
     /// Better to drop these events to avoid complications.
-    if (recursive_add_call)
+    if (recursive_push_call)
         return;
-    recursive_add_call = true;
-    SCOPE_EXIT({ recursive_add_call = false; });
+    recursive_push_call = true;
+    SCOPE_EXIT({ recursive_push_call = false; });
 
     /// Memory can be allocated while resizing on queue.push_back.
     /// The size of allocation can be in order of a few megabytes.
@@ -118,15 +118,31 @@ void SystemLogQueue<LogElement>::add(const LogElement & element)
 }
 
 template <typename LogElement>
-void SystemLogQueue<LogElement>::shutdown()
-{ 
-    is_shutdown = true;         
-    /// Tell thread to shutdown.
-    flush_event.notify_all();
+uint64_t SystemLogQueue<LogElement>::notifyFlush(bool should_prepare_tables_anyway)
+{
+    uint64_t this_thread_requested_offset;
+
+    {
+        std::lock_guard lock(mutex);
+        if (is_shutdown)
+            return uint64_t(-1);
+
+        this_thread_requested_offset = queue_front_index + queue.size();
+
+        // Publish our flush request, taking care not to overwrite the requests
+        // made by other threads.
+        is_force_prepare_tables |= should_prepare_tables_anyway;
+        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
+
+        flush_event.notify_all();
+    }
+
+    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
+    return this_thread_requested_offset;
 }
 
 template <typename LogElement>
-void SystemLogQueue<LogElement>::waitFlush(uint64_t this_thread_requested_offset_)
+void SystemLogQueue<LogElement>::waitFlush(uint64_t expected_flushed_up_to)
 {
     // Use an arbitrary timeout to avoid endless waiting. 60s proved to be
     // too fast for our parallel functional tests, probably because they
@@ -135,7 +151,7 @@ void SystemLogQueue<LogElement>::waitFlush(uint64_t this_thread_requested_offset
     std::unique_lock lock(mutex);
     bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
     {
-        return flushed_up_to >= this_thread_requested_offset_ && !is_force_prepare_tables;
+        return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables;
     });
 
     if (!result)
@@ -155,7 +171,7 @@ void SystemLogQueue<LogElement>::confirm(uint64_t to_flush_end)
 }
 
 template <typename LogElement>
-void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread)
+SystemLogQueue<LogElement>::Index SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, bool& should_prepare_tables_anyway, bool& exit_this_thread)
 {
     std::unique_lock lock(mutex);
     flush_event.wait_for(lock,
@@ -167,7 +183,6 @@ void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t&
     );
 
     queue_front_index += queue.size();
-    to_flush_end = queue_front_index;
     // Swap with existing array from previous flush, to save memory
     // allocations.
     output.resize(0);
@@ -176,30 +191,15 @@ void SystemLogQueue<LogElement>::pop(std::vector<LogElement>& output, uint64_t&
     should_prepare_tables_anyway = is_force_prepare_tables;
 
     exit_this_thread = is_shutdown;
+    return queue_front_index;
 }
 
 template <typename LogElement>
-uint64_t SystemLogQueue<LogElement>::notifyFlush(bool force)
-{
-    uint64_t this_thread_requested_offset;
-
-    {
-        std::lock_guard lock(mutex);
-        if (is_shutdown)
-            return uint64_t(-1);
-
-        this_thread_requested_offset = queue_front_index + queue.size();
-
-        // Publish our flush request, taking care not to overwrite the requests
-        // made by other threads.
-        is_force_prepare_tables |= force;
-        requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset);
-
-        flush_event.notify_all();
-    }
-
-    LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset);
-    return this_thread_requested_offset;
+void SystemLogQueue<LogElement>::shutdown()
+{ 
+    is_shutdown = true;         
+    /// Tell thread to shutdown.
+    flush_event.notify_all();
 }
 
 #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogQueue<ELEMENT>;
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 4cdb07c2cab..281cd06354a 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -75,20 +75,23 @@ protected:
 template <typename LogElement>
 class SystemLogQueue
 {
+    using Index = uint64_t;
+
 public:
     SystemLogQueue(
         const String & name_,
         size_t flush_interval_milliseconds_);
 
-    // producer methods
-    void add(const LogElement & element);
     void shutdown();
-    uint64_t notifyFlush(bool force);
-    void waitFlush(uint64_t this_thread_requested_offset_);
+
+    // producer methods
+    void push(const LogElement & element);
+    Index notifyFlush(bool should_prepare_tables_anyway);
+    void waitFlush(Index expected_flushed_up_to);
 
      // consumer methods
-    void pop(std::vector<LogElement>& output, uint64_t& to_flush_end, bool& should_prepare_tables_anyway, bool& exit_this_thread);
-    void confirm(uint64_t to_flush_end);
+    Index pop(std::vector<LogElement>& output, bool& should_prepare_tables_anyway, bool& exit_this_thread);
+    void confirm(Index to_flush_end);
 
     /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
     std::mutex mutex;
@@ -102,19 +105,19 @@ private:
     // We use it to give a global sequential index to every message, so that we
     // can wait until a particular message is flushed. This is used to implement
     // synchronous log flushing for SYSTEM FLUSH LOGS.
-    uint64_t queue_front_index = 0;
+    Index queue_front_index = 0;
     // A flag that says we must create the tables even if the queue is empty.
     bool is_force_prepare_tables = false;
     // Requested to flush logs up to this index, exclusive
-    uint64_t requested_flush_up_to = 0;
-    std::condition_variable flush_event;
+    Index requested_flush_up_to = 0;
     // Flushed log up to this index, exclusive
-    uint64_t flushed_up_to = 0;
+    Index flushed_up_to = 0;
     // Logged overflow message at this queue front index
-    uint64_t logged_queue_full_at_index = -1;
-
+    Index logged_queue_full_at_index = -1;
+    
     bool is_shutdown = false;
 
+    std::condition_variable flush_event;
     const size_t flush_interval_milliseconds;
 };
 
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index cb4578689a2..3193baa551f 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -398,7 +398,7 @@ void SystemLog<LogElement>::savingThreadFunction()
             // Should we prepare table even if there are no new messages.
             bool should_prepare_tables_anyway = false;
 
-            queue->pop(to_flush, to_flush_end, should_prepare_tables_anyway, exit_this_thread);
+            to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread);
 
             if (to_flush.empty())
             {
@@ -621,7 +621,7 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
 template <typename LogElement>
 void SystemLog<LogElement>::add(const LogElement & element)
 {
-    queue->add(element);
+    queue->push(element);
 }
 
 template <typename LogElement>
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 17806153905..b5ac42d6041 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -138,7 +138,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
         std::shared_ptr<SystemLogQueue<TextLogElement>> text_log_locked{};
         text_log_locked = text_log.lock();
         if (text_log_locked)
-            text_log_locked->add(elem);
+            text_log_locked->push(elem);
     }
 #endif
 }

From 6ae5207819b84dfd63c92cbd848995eaba6586f2 Mon Sep 17 00:00:00 2001
From: Song Liyong <awesomeleo@163.com>
Date: Thu, 13 Jul 2023 13:27:23 +0200
Subject: [PATCH 106/242] MaterializedMySQL: Introduce charset conversion

---
 src/Core/MySQL/IMySQLReadPacket.cpp           |  12 +-
 src/Core/MySQL/IMySQLReadPacket.h             |   1 +
 src/Core/MySQL/MySQLCharset.cpp               | 301 +++++++++++++++
 src/Core/MySQL/MySQLCharset.h                 |  26 ++
 src/Core/MySQL/MySQLReplication.cpp           | 143 ++++++-
 src/Core/MySQL/MySQLReplication.h             |  61 ++-
 src/Core/tests/gtest_charset_conv.cpp         | 351 ++++++++++++++++++
 .../materialized_with_ddl.py                  |  83 +++++
 .../test_materialized_mysql_database/test.py  |   6 +
 utils/check-mysql-binlog/main.cpp             |   4 +-
 10 files changed, 979 insertions(+), 9 deletions(-)
 create mode 100644 src/Core/MySQL/MySQLCharset.cpp
 create mode 100644 src/Core/MySQL/MySQLCharset.h
 create mode 100644 src/Core/tests/gtest_charset_conv.cpp

diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp
index 39b2e5bbfb5..bb00444c6b3 100644
--- a/src/Core/MySQL/IMySQLReadPacket.cpp
+++ b/src/Core/MySQL/IMySQLReadPacket.cpp
@@ -43,11 +43,12 @@ void LimitedReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
     IMySQLReadPacket::readPayloadWithUnpacked(limited);
 }
 
-uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
+uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read)
 {
     char c{};
     uint64_t buf = 0;
     buffer.readStrict(c);
+    bytes_read = 1;
     auto cc = static_cast<uint8_t>(c);
     switch (cc)
     {
@@ -56,12 +57,15 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
             break;
         case 0xfc:
             buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
+            bytes_read += 2;
             break;
         case 0xfd:
             buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
+            bytes_read += 3;
             break;
         case 0xfe:
             buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
+            bytes_read += 8;
             break;
         default:
             return cc;
@@ -69,6 +73,12 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
     return buf;
 }
 
+uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
+{
+    UInt16 bytes_read = 0;
+    return readLengthEncodedNumber(buffer, bytes_read);
+}
+
 void readLengthEncodedString(String & s, ReadBuffer & buffer)
 {
     uint64_t len = readLengthEncodedNumber(buffer);
diff --git a/src/Core/MySQL/IMySQLReadPacket.h b/src/Core/MySQL/IMySQLReadPacket.h
index eab31889091..b6c3d59f5ee 100644
--- a/src/Core/MySQL/IMySQLReadPacket.h
+++ b/src/Core/MySQL/IMySQLReadPacket.h
@@ -34,6 +34,7 @@ public:
 };
 
 uint64_t readLengthEncodedNumber(ReadBuffer & buffer);
+uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read);
 void readLengthEncodedString(String & s, ReadBuffer & buffer);
 
 }
diff --git a/src/Core/MySQL/MySQLCharset.cpp b/src/Core/MySQL/MySQLCharset.cpp
new file mode 100644
index 00000000000..869941ebd84
--- /dev/null
+++ b/src/Core/MySQL/MySQLCharset.cpp
@@ -0,0 +1,301 @@
+#include "MySQLCharset.h"
+#include "config.h"
+#include <iostream>
+#include <Common/Exception.h>
+
+#if USE_ICU
+#include <unicode/ucnv.h>
+#define CHUNK_SIZE 1024
+static const char * TARGET_CHARSET = "utf8";
+#endif
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_EXCEPTION;
+}
+
+const std::unordered_map<Int32, String> MySQLCharset::charsets
+    = {
+          {1, "big5"},
+          {2, "latin2"},
+          {3, "dec8"},
+          {4, "cp850"},
+          {5, "latin1"},
+          {6, "hp8"},
+          {7, "koi8r"},
+          {8, "latin1"},
+          {9, "latin2"},
+          {10, "swe7"},
+          {11, "ascii"},
+          {12, "ujis"},
+          {13, "sjis"},
+          {14, "cp1251"},
+          {15, "latin1"},
+          {16, "hebrew"},
+          {18, "tis620"},
+          {19, "euckr"},
+          {20, "latin7"},
+          {21, "latin2"},
+          {22, "koi8u"},
+          {23, "cp1251"},
+          {24, "gb2312"},
+          {25, "greek"},
+          {26, "cp1250"},
+          {27, "latin2"},
+          {28, "gbk"},
+          {29, "cp1257"},
+          {30, "latin5"},
+          {31, "latin1"},
+          {32, "armscii8"},
+          {34, "cp1250"},
+          {35, "ucs2"},
+          {36, "cp866"},
+          {37, "keybcs2"},
+          {38, "macce"},
+          {39, "macroman"},
+          {40, "cp852"},
+          {41, "latin7"},
+          {42, "latin7"},
+          {43, "macce"},
+          {44, "cp1250"},
+          {47, "latin1"},
+          {48, "latin1"},
+          {49, "latin1"},
+          {50, "cp1251"},
+          {51, "cp1251"},
+          {52, "cp1251"},
+          {53, "macroman"},
+          {54, "utf16"},
+          {55, "utf16"},
+          {56, "utf16le"},
+          {57, "cp1256"},
+          {58, "cp1257"},
+          {59, "cp1257"},
+          {60, "utf32"},
+          {61, "utf32"},
+          {62, "utf16le"},
+          {64, "armscii8"},
+          {65, "ascii"},
+          {66, "cp1250"},
+          {67, "cp1256"},
+          {68, "cp866"},
+          {69, "dec8"},
+          {70, "greek"},
+          {71, "hebrew"},
+          {72, "hp8"},
+          {73, "keybcs2"},
+          {74, "koi8r"},
+          {75, "koi8u"},
+          {77, "latin2"},
+          {78, "latin5"},
+          {79, "latin7"},
+          {80, "cp850"},
+          {81, "cp852"},
+          {82, "swe7"},
+          {84, "big5"},
+          {85, "euckr"},
+          {86, "gb2312"},
+          {87, "gbk"},
+          {88, "sjis"},
+          {89, "tis620"},
+          {90, "ucs2"},
+          {91, "ujis"},
+          {92, "geostd8"},
+          {93, "geostd8"},
+          {94, "latin1"},
+          {95, "cp932"},
+          {96, "cp932"},
+          {97, "eucjpms"},
+          {98, "eucjpms"},
+          {99, "cp1250"},
+          {101, "utf16"},
+          {102, "utf16"},
+          {103, "utf16"},
+          {104, "utf16"},
+          {105, "utf16"},
+          {106, "utf16"},
+          {107, "utf16"},
+          {108, "utf16"},
+          {109, "utf16"},
+          {110, "utf16"},
+          {111, "utf16"},
+          {112, "utf16"},
+          {113, "utf16"},
+          {114, "utf16"},
+          {115, "utf16"},
+          {116, "utf16"},
+          {117, "utf16"},
+          {118, "utf16"},
+          {119, "utf16"},
+          {120, "utf16"},
+          {121, "utf16"},
+          {122, "utf16"},
+          {123, "utf16"},
+          {124, "utf16"},
+          {128, "ucs2"},
+          {129, "ucs2"},
+          {130, "ucs2"},
+          {131, "ucs2"},
+          {132, "ucs2"},
+          {133, "ucs2"},
+          {134, "ucs2"},
+          {135, "ucs2"},
+          {136, "ucs2"},
+          {137, "ucs2"},
+          {138, "ucs2"},
+          {139, "ucs2"},
+          {140, "ucs2"},
+          {141, "ucs2"},
+          {142, "ucs2"},
+          {143, "ucs2"},
+          {144, "ucs2"},
+          {145, "ucs2"},
+          {146, "ucs2"},
+          {147, "ucs2"},
+          {148, "ucs2"},
+          {149, "ucs2"},
+          {150, "ucs2"},
+          {151, "ucs2"},
+          {159, "ucs2"},
+          {160, "utf32"},
+          {161, "utf32"},
+          {162, "utf32"},
+          {163, "utf32"},
+          {164, "utf32"},
+          {165, "utf32"},
+          {166, "utf32"},
+          {167, "utf32"},
+          {168, "utf32"},
+          {169, "utf32"},
+          {170, "utf32"},
+          {171, "utf32"},
+          {172, "utf32"},
+          {173, "utf32"},
+          {174, "utf32"},
+          {175, "utf32"},
+          {176, "utf32"},
+          {177, "utf32"},
+          {178, "utf32"},
+          {179, "utf32"},
+          {180, "utf32"},
+          {181, "utf32"},
+          {182, "utf32"},
+          {183, "utf32"},
+          {248, "gb18030"},
+          {249, "gb18030"},
+          {250, "gb18030"}
+      };
+
+MySQLCharset::~MySQLCharset()
+{
+#if USE_ICU
+    std::lock_guard lock(mutex);
+    for (auto & conv : conv_cache)
+    {
+        ucnv_close(conv.second);
+    }
+    conv_cache.clear();
+#endif
+}
+
+bool MySQLCharset::needConvert(UInt32 id)
+{
+    return charsets.contains(id);
+}
+
+String MySQLCharset::getCharsetFromId(UInt32 id)
+{
+    return charsets.at(id);
+}
+
+UConverter * MySQLCharset::getCachedConverter(const String & charset [[maybe_unused]])
+{
+    UConverter * conv = nullptr;
+#if USE_ICU
+    UErrorCode error = U_ZERO_ERROR;
+    /// Get conv from cache
+    auto result = conv_cache.find(charset);
+    if (result != conv_cache.end())
+    {
+        conv = result->second;
+        //reset to init state
+        ucnv_reset(conv);
+    }
+    else
+    {
+        conv = ucnv_open(charset.c_str(), &error);
+        if (error != U_ZERO_ERROR)
+        {
+            throw Exception(
+                ErrorCodes::UNKNOWN_EXCEPTION, "MySQLCharset::getCachedConveter: ucnv_open failed, error={}", std::to_string(error));
+        }
+        conv_cache[charset.c_str()] = conv;
+    }
+#endif
+    return conv;
+}
+
+Int32 MySQLCharset::convertFromId(UInt32 id [[maybe_unused]], String & to, const String & from)
+{
+#if USE_ICU
+    std::lock_guard lock(mutex);
+    UErrorCode error = U_ZERO_ERROR;
+    String source_charset = getCharsetFromId(id);
+    to.clear();
+    if (source_charset.empty())
+    {
+        return U_ILLEGAL_ARGUMENT_ERROR;
+    }
+
+    UChar pivot_buf[CHUNK_SIZE]; // stream mode must use this buf
+    char target_buf[CHUNK_SIZE];
+    UChar * pivot;
+    UChar * pivot2;
+    UConverter * in_conv;
+    UConverter * out_conv;
+    char * cur_target;
+    const char * source_end;
+    const char * target_end;
+
+    size_t source_len = from.size();
+    const char * source = from.data();
+    source_end = source + source_len;
+
+    out_conv = getCachedConverter(TARGET_CHARSET);
+    in_conv = getCachedConverter(source_charset);
+    pivot = pivot_buf;
+    pivot2 = pivot_buf;
+
+    target_end = target_buf + CHUNK_SIZE;
+    do
+    {
+        error = U_ZERO_ERROR;
+        cur_target = target_buf;
+        ucnv_convertEx(
+            out_conv,
+            in_conv,
+            &cur_target,
+            target_end,
+            &source,
+            source_end,
+            pivot_buf,
+            &pivot,
+            &pivot2,
+            pivot_buf + CHUNK_SIZE,
+            false,
+            true,
+            &error);
+        to.append(target_buf, cur_target - target_buf);
+    } while (error == U_BUFFER_OVERFLOW_ERROR);
+
+    return error;
+#else
+    to = from;
+    return 0;
+#endif
+}
+
+}
diff --git a/src/Core/MySQL/MySQLCharset.h b/src/Core/MySQL/MySQLCharset.h
new file mode 100644
index 00000000000..4371a2853ed
--- /dev/null
+++ b/src/Core/MySQL/MySQLCharset.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <unordered_map>
+#include <base/types.h>
+#include <boost/noncopyable.hpp>
+#include <mutex>
+
+struct UConverter;
+
+namespace DB
+{
+class MySQLCharset final : boost::noncopyable
+{
+public:
+    ~MySQLCharset();
+    String getCharsetFromId(UInt32 id);
+    Int32 convertFromId(UInt32 id, String & to, const String & from);
+    bool needConvert(UInt32 id);
+private:
+    std::mutex mutex;
+    std::unordered_map<String, UConverter *> conv_cache;
+    UConverter * getCachedConverter(const String & charset);
+    static const std::unordered_map<Int32, String> charsets;
+};
+
+using MySQLCharsetPtr = std::shared_ptr<MySQLCharset>;
+}
diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index 1ee027b7185..1b0f36714f1 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -176,9 +176,9 @@ namespace MySQLReplication
         size_t null_bitmap_size = (column_count + 7) / 8;
         readBitmap(payload, null_bitmap, null_bitmap_size);
 
-        /// Ignore MySQL 8.0 optional metadata fields.
+        /// Parse MySQL 8.0 optional metadata fields.
         /// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
-        payload.ignoreAll();
+        parseOptionalMetaField(payload);
     }
 
     /// Types that do not used in the binlog event:
@@ -252,6 +252,118 @@ namespace MySQLReplication
         }
     }
 
+    void TableMapEvent::parseOptionalMetaField(ReadBuffer & payload)
+    {
+        char type = 0;
+        while (payload.read(type))
+        {
+            UInt64 len = readLengthEncodedNumber(payload);
+            if (len == 0)
+            {
+                payload.ignoreAll();
+                return;
+            }
+            switch (type)
+            {
+                /// It may be useful, parse later
+                case SIGNEDNESS:
+                    payload.ignore(len);
+                    break;
+                case DEFAULT_CHARSET:
+                {
+                    UInt32 total_read = 0;
+                    UInt16 once_read = 0;
+                    default_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
+                    total_read += once_read;
+                    while (total_read < len)
+                    {
+                        UInt32 col_index = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
+                        total_read += once_read;
+                        UInt32 col_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
+                        total_read += once_read;
+                        default_charset_pairs.emplace(col_index, col_charset);
+                    }
+                    break;
+                }
+                case COLUMN_CHARSET:
+                {
+                    UInt32 total_read = 0;
+                    UInt16 once_read = 0;
+                    while (total_read < len)
+                    {
+                        UInt32 collation_id = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
+                        column_charset.emplace_back(collation_id);
+                        total_read += once_read;
+                    }
+                    break;
+                }
+                case COLUMN_NAME:
+                    payload.ignore(len);
+                    break;
+                case SET_STR_VALUE:
+                case GEOMETRY_TYPE:
+                case SIMPLE_PRIMARY_KEY:
+                case PRIMARY_KEY_WITH_PREFIX:
+                case ENUM_AND_SET_DEFAULT_CHARSET:
+                case COLUMN_VISIBILITY:
+                default:
+                    payload.ignore(len);
+                    break;
+            }
+        }
+    }
+
+    UInt32 TableMapEvent::getColumnCharsetId(UInt32 column_index)
+    {
+        if (!column_charset.empty())
+        {
+            UInt32 str_index = 0xFFFFFFFF;
+            /// Calc the index in the column_charset
+            for (UInt32 i = 0; i <= column_index; ++i)
+            {
+                switch (column_type[i])
+                {
+                    case MYSQL_TYPE_STRING:
+                    case MYSQL_TYPE_VAR_STRING:
+                    case MYSQL_TYPE_VARCHAR:
+                    case MYSQL_TYPE_BLOB:
+                        ++str_index;
+                        break;
+                    default:
+                        break;
+                }
+            }
+
+            if (str_index != 0xFFFFFFFF && str_index < column_charset.size())
+            {
+                return column_charset[str_index];
+            }
+        }
+        else if (!default_charset_pairs.empty())
+        {
+            UInt32 str_index = 0xFFFFFFFF;
+            for (UInt32 i = 0; i <= column_index; ++i)
+            {
+                switch (column_type[i])
+                {
+                    case MYSQL_TYPE_STRING:
+                    case MYSQL_TYPE_VAR_STRING:
+                    case MYSQL_TYPE_VARCHAR:
+                    case MYSQL_TYPE_BLOB:
+                        ++str_index;
+                        break;
+                    default:
+                        break;
+                }
+            }
+            if (default_charset_pairs.contains(str_index))
+            {
+                return default_charset_pairs[str_index];
+            }
+        }
+        return default_charset;
+    }
+
     void TableMapEvent::dump(WriteBuffer & out) const
     {
         header.dump(out);
@@ -308,6 +420,22 @@ namespace MySQLReplication
         }
     }
 
+    static inline String convertCharsetIfNeeded(
+        const std::shared_ptr<TableMapEvent> & table_map,
+        UInt32 i,
+        const String & val)
+    {
+        const auto collation_id = table_map->getColumnCharsetId(i);
+        if (table_map->charset_ptr->needConvert(collation_id))
+        {
+            String target;
+            auto err = table_map->charset_ptr->convertFromId(collation_id, target, val);
+            if (err == 0)
+                return target;
+        }
+        return val;
+    }
+
     /// Types that do not used in the binlog event:
     /// MYSQL_TYPE_SET
     /// MYSQL_TYPE_TINY_BLOB
@@ -716,7 +844,7 @@ namespace MySQLReplication
                         String val;
                         val.resize(size);
                         payload.readStrict(reinterpret_cast<char *>(val.data()), size);
-                        row.push_back(Field{String{val}});
+                        row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
                         break;
                     }
                     case MYSQL_TYPE_STRING:
@@ -734,7 +862,7 @@ namespace MySQLReplication
                         String val;
                         val.resize(size);
                         payload.readStrict(reinterpret_cast<char *>(val.data()), size);
-                        row.push_back(Field{String{val}});
+                        row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
                         break;
                     }
                     case MYSQL_TYPE_GEOMETRY:
@@ -766,7 +894,10 @@ namespace MySQLReplication
                         String val;
                         val.resize(size);
                         payload.readStrict(reinterpret_cast<char *>(val.data()), size);
-                        row.push_back(Field{String{val}});
+                        row.emplace_back(Field{
+                            field_type == MYSQL_TYPE_BLOB
+                            ? convertCharsetIfNeeded(table_map, i, val)
+                            : val});
                         break;
                     }
                     default:
@@ -966,7 +1097,7 @@ namespace MySQLReplication
                 map_event_header.parse(event_payload);
                 if (doReplicate(map_event_header.schema, map_event_header.table))
                 {
-                    event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header);
+                    event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header, flavor_charset);
                     event->parseEvent(event_payload);
                     auto table_map = std::static_pointer_cast<TableMapEvent>(event);
                     table_maps[table_map->table_id] = table_map;
diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h
index 5825924d10b..190a2e8484d 100644
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@@ -2,6 +2,7 @@
 #include <Core/Field.h>
 #include <Core/MySQL/PacketsReplication.h>
 #include <Core/MySQL/MySQLGtid.h>
+#include <Core/MySQL/MySQLCharset.h>
 #include <base/types.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
@@ -436,9 +437,24 @@ namespace MySQLReplication
         UInt32 column_count;
         std::vector<UInt8> column_type;
         std::vector<UInt16> column_meta;
+        /// Character set of string columns
+        std::vector<UInt32> column_charset;
+        /// Character set of string columns,
+        /// optimized to minimize space when many
+        /// columns have the same charset
+        UInt32 default_charset = 255; /// utf8mb4_0900_ai_ci
+        std::unordered_map<UInt32, UInt32> default_charset_pairs;
+        /// Points to flavor_charset object
+        MySQLCharsetPtr charset_ptr;
         Bitmap null_bitmap;
 
-        TableMapEvent(EventHeader && header_, const TableMapEventHeader & map_event_header) : EventBase(std::move(header_)), column_count(0)
+        TableMapEvent(
+            EventHeader && header_,
+            const TableMapEventHeader & map_event_header,
+            const MySQLCharsetPtr & charset_ptr_)
+            : EventBase(std::move(header_))
+            , column_count(0)
+            , charset_ptr(charset_ptr_)
         {
             table_id = map_event_header.table_id;
             flags = map_event_header.flags;
@@ -448,10 +464,52 @@ namespace MySQLReplication
             table = map_event_header.table;
         }
         void dump(WriteBuffer & out) const override;
+        UInt32 getColumnCharsetId(UInt32 column_index);
+        /// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
+        /// https://github.com/mysql/mysql-server/blob/8.0/libbinlogevents/include/rows_event.h#L50
+        /// DEFAULT_CHARSET and COLUMN_CHARSET don't appear together, and
+        /// ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET don't appear together.
+        enum OptionalMetaType : char
+        {
+            /// UNSIGNED flag of numeric columns
+            SIGNEDNESS = 1,
+            /// Character set of string columns, optimized to
+            /// minimize space when many columns have the
+            /// same charset
+            DEFAULT_CHARSET,
+            /// Character set of string columns, optimized to
+            /// minimize space when columns have many
+            /// different charsets
+            COLUMN_CHARSET,
+            COLUMN_NAME,
+            /// String value of SET columns
+            SET_STR_VALUE,
+            /// String value of ENUM columns
+            ENUM_STR_VALUE,
+            /// Real type of geometry columns
+            GEOMETRY_TYPE,
+            /// Primary key without prefix
+            SIMPLE_PRIMARY_KEY,
+            /// Primary key with prefix
+            PRIMARY_KEY_WITH_PREFIX,
+            /// Character set of enum and set
+            /// columns, optimized to minimize
+            /// space when many columns have the
+            /// same charset
+            ENUM_AND_SET_DEFAULT_CHARSET,
+            /// Character set of enum and set
+            /// columns, optimized to minimize
+            /// space when many columns have the
+            /// same charset
+            ENUM_AND_SET_COLUMN_CHARSET,
+            /// Flag to indicate column visibility attribute
+            COLUMN_VISIBILITY
+        };
 
     protected:
         void parseImpl(ReadBuffer & payload) override;
         void parseMeta(String meta);
+        void parseOptionalMetaField(ReadBuffer & payload);
     };
 
     enum RowsEventFlags
@@ -598,6 +656,7 @@ namespace MySQLReplication
         std::unordered_set<String> replicate_tables;
         std::map<UInt64, std::shared_ptr<TableMapEvent> > table_maps;
         size_t checksum_signature_length = 4;
+        MySQLCharsetPtr flavor_charset = std::make_shared<MySQLCharset>();
 
         bool doReplicate(UInt64 table_id);
         bool doReplicate(const String & db, const String & table_name);
diff --git a/src/Core/tests/gtest_charset_conv.cpp b/src/Core/tests/gtest_charset_conv.cpp
new file mode 100644
index 00000000000..073b0dd74b4
--- /dev/null
+++ b/src/Core/tests/gtest_charset_conv.cpp
@@ -0,0 +1,351 @@
+#include <Core/MySQL/MySQLCharset.h>
+#include <gtest/gtest.h>
+#include <cstdio>
+
+namespace DB
+{
+
+struct CheckResult
+{
+    Int32 id;
+    String name;
+    bool need_convert;
+};
+
+TEST(CharsetTest, CharsetTest)
+{
+    MySQLCharset charset;
+    UInt32 big5_id = 1;
+    UInt32 gbk_id = 28;
+    UInt32 gb2312_id = 24;
+    UInt32 utf8mb4_ai_ci_id = 255;
+    EXPECT_TRUE(charset.needConvert(big5_id));
+    EXPECT_TRUE(charset.needConvert(gbk_id));
+    EXPECT_TRUE(charset.needConvert(gb2312_id));
+    EXPECT_FALSE(charset.needConvert(utf8mb4_ai_ci_id));
+    EXPECT_FALSE(charset.needConvert(0));
+    EXPECT_FALSE(charset.needConvert(1000));
+
+    EXPECT_EQ(charset.getCharsetFromId(big5_id), String("big5"));
+    EXPECT_EQ(charset.getCharsetFromId(gbk_id), String("gbk"));
+    EXPECT_EQ(charset.getCharsetFromId(gb2312_id), String("gb2312"));
+}
+
+TEST(CharsetTest, ConvTest)
+{
+    MySQLCharset charset;
+    UInt32 big5_id = 1;
+    UInt32 gbk_id = 28;
+    UInt32 gb2312_id = 24;
+    Int32 error = 0;
+    String source("\xc4\xe3\xba\xc3"); // gbk "你好"
+    String target;
+    String expect("\xe4\xbd\xa0\xe5\xa5\xbd");
+
+    error = charset.convertFromId(gbk_id, target, source);
+    EXPECT_EQ(error, 0);
+    EXPECT_TRUE(target == expect);
+
+    error = charset.convertFromId(gb2312_id, target, source);
+    EXPECT_EQ(error, 0);
+    EXPECT_TRUE(target == expect);
+
+    source.assign("\xa7\x41\xa6\x6e"); // big5 "你好"
+    error = charset.convertFromId(big5_id, target, source);
+    EXPECT_EQ(error, 0);
+    EXPECT_TRUE(target == expect);
+}
+
+TEST(CharsetTest, FullCharsetCheck)
+{
+    CheckResult result[] =
+    {
+        {1, "big5", true}, // "big5_chinese_ci",
+        {2, "latin2", true}, // "latin2_czech_cs",
+        {3, "dec8", true}, // "dec8_swedish_ci",
+        {4, "cp850", true}, // "cp850_general_ci",
+        {5, "latin1", true}, // "latin1_german1_ci",
+        {6, "hp8", true}, // "hp8_english_ci",
+        {7, "koi8r", true}, // "koi8r_general_ci",
+        {8, "latin1", true}, // "latin1_swedish_ci",
+        {9, "latin2", true}, // "latin2_general_ci",
+        {10, "swe7", true}, // "swe7_swedish_ci",
+        {11, "ascii", true}, // "ascii_general_ci",
+        {12, "ujis", true}, // "ujis_japanese_ci",
+        {13, "sjis", true}, // "sjis_japanese_ci",
+        {14, "cp1251", true}, // "cp1251_bulgarian_ci",
+        {15, "latin1", true}, // "latin1_danish_ci",
+        {16, "hebrew", true}, // "hebrew_general_ci",
+        {18, "tis620", true}, // "tis620_thai_ci",
+        {19, "euckr", true}, // "euckr_korean_ci",
+        {20, "latin7", true}, // "latin7_estonian_cs",
+        {21, "latin2", true}, // "latin2_hungarian_ci",
+        {22, "koi8u", true}, // "koi8u_general_ci",
+        {23, "cp1251", true}, // "cp1251_ukrainian_ci",
+        {24, "gb2312", true}, // "gb2312_chinese_ci",
+        {25, "greek", true}, // "greek_general_ci",
+        {26, "cp1250", true}, // "cp1250_general_ci",
+        {27, "latin2", true}, // "latin2_croatian_ci",
+        {28, "gbk", true}, // "gbk_chinese_ci",
+        {29, "cp1257", true}, // "cp1257_lithuanian_ci",
+        {30, "latin5", true}, // "latin5_turkish_ci",
+        {31, "latin1", true}, // "latin1_german2_ci",
+        {32, "armscii8", true}, // "armscii8_general_ci",
+        {33, "utf8", false}, // "utf8_general_ci",
+        {34, "cp1250", true}, // "cp1250_czech_cs",
+        {35, "ucs2", true}, // "ucs2_general_ci",
+        {36, "cp866", true}, // "cp866_general_ci",
+        {37, "keybcs2", true}, // "keybcs2_general_ci",
+        {38, "macce", true}, // "macce_general_ci",
+        {39, "macroman", true}, // "macroman_general_ci",
+        {40, "cp852", true}, // "cp852_general_ci",
+        {41, "latin7", true}, // "latin7_general_ci",
+        {42, "latin7", true}, // "latin7_general_cs",
+        {43, "macce", true}, // "macce_bin",
+        {44, "cp1250", true}, // "cp1250_croatian_ci",
+        {45, "utf8mb4", false}, // "utf8mb4_general_ci",
+        {46, "utf8mb4", false}, // "utf8mb4_bin",
+        {47, "latin1", true}, // "latin1_bin",
+        {48, "latin1", true}, // "latin1_general_ci",
+        {49, "latin1", true}, // "latin1_general_cs",
+        {50, "cp1251", true}, // "cp1251_bin",
+        {51, "cp1251", true}, // "cp1251_general_ci",
+        {52, "cp1251", true}, // "cp1251_general_cs",
+        {53, "macroman", true}, // "macroman_bin",
+        {54, "utf16", true}, // "utf16_general_ci",
+        {55, "utf16", true}, // "utf16_bin",
+        {56, "utf16le", true}, // "utf16le_general_ci",
+        {57, "cp1256", true}, // "cp1256_general_ci",
+        {58, "cp1257", true}, // "cp1257_bin",
+        {59, "cp1257", true}, // "cp1257_general_ci",
+        {60, "utf32", true}, // "utf32_general_ci",
+        {61, "utf32", true}, // "utf32_bin",
+        {62, "utf16le", true}, // "utf16le_bin",
+        {64, "armscii8", true}, // "armscii8_bin",
+        {65, "ascii", true}, // "ascii_bin",
+        {66, "cp1250", true}, // "cp1250_bin",
+        {67, "cp1256", true}, // "cp1256_bin",
+        {68, "cp866", true}, // "cp866_bin",
+        {69, "dec8", true}, // "dec8_bin",
+        {70, "greek", true}, // "greek_bin",
+        {71, "hebrew", true}, // "hebrew_bin",
+        {72, "hp8", true}, // "hp8_bin",
+        {73, "keybcs2", true}, // "keybcs2_bin",
+        {74, "koi8r", true}, // "koi8r_bin",
+        {75, "koi8u", true}, // "koi8u_bin",
+        {77, "latin2", true}, // "latin2_bin",
+        {78, "latin5", true}, // "latin5_bin",
+        {79, "latin7", true}, // "latin7_bin",
+        {80, "cp850", true}, // "cp850_bin",
+        {81, "cp852", true}, // "cp852_bin",
+        {82, "swe7", true}, // "swe7_bin",
+        {83, "utf8", false}, // "utf8_bin",
+        {84, "big5", true}, // "big5_bin",
+        {85, "euckr", true}, // "euckr_bin",
+        {86, "gb2312", true}, // "gb2312_bin",
+        {87, "gbk", true}, // "gbk_bin",
+        {88, "sjis", true}, // "sjis_bin",
+        {89, "tis620", true}, // "tis620_bin",
+        {90, "ucs2", true}, // "ucs2_bin",
+        {91, "ujis", true}, // "ujis_bin",
+        {92, "geostd8", true}, // "geostd8_general_ci",
+        {93, "geostd8", true}, // "geostd8_bin",
+        {94, "latin1", true}, // "latin1_spanish_ci",
+        {95, "cp932", true}, // "cp932_japanese_ci",
+        {96, "cp932", true}, // "cp932_bin",
+        {97, "eucjpms", true}, // "eucjpms_japanese_ci",
+        {98, "eucjpms", true}, // "eucjpms_bin",
+        {99, "cp1250", true}, // "cp1250_polish_ci",
+        {101, "utf16", true}, // "utf16_unicode_ci",
+        {102, "utf16", true}, // "utf16_icelandic_ci",
+        {103, "utf16", true}, // "utf16_latvian_ci",
+        {104, "utf16", true}, // "utf16_romanian_ci",
+        {105, "utf16", true}, // "utf16_slovenian_ci",
+        {106, "utf16", true}, // "utf16_polish_ci",
+        {107, "utf16", true}, // "utf16_estonian_ci",
+        {108, "utf16", true}, // "utf16_spanish_ci",
+        {109, "utf16", true}, // "utf16_swedish_ci",
+        {110, "utf16", true}, // "utf16_turkish_ci",
+        {111, "utf16", true}, // "utf16_czech_ci",
+        {112, "utf16", true}, // "utf16_danish_ci",
+        {113, "utf16", true}, // "utf16_lithuanian_ci",
+        {114, "utf16", true}, // "utf16_slovak_ci",
+        {115, "utf16", true}, // "utf16_spanish2_ci",
+        {116, "utf16", true}, // "utf16_roman_ci",
+        {117, "utf16", true}, // "utf16_persian_ci",
+        {118, "utf16", true}, // "utf16_esperanto_ci",
+        {119, "utf16", true}, // "utf16_hungarian_ci",
+        {120, "utf16", true}, // "utf16_sinhala_ci",
+        {121, "utf16", true}, // "utf16_german2_ci",
+        {122, "utf16", true}, // "utf16_croatian_ci",
+        {123, "utf16", true}, // "utf16_unicode_520_ci",
+        {124, "utf16", true}, // "utf16_vietnamese_ci",
+        {128, "ucs2", true}, // "ucs2_unicode_ci",
+        {129, "ucs2", true}, // "ucs2_icelandic_ci",
+        {130, "ucs2", true}, // "ucs2_latvian_ci",
+        {131, "ucs2", true}, // "ucs2_romanian_ci",
+        {132, "ucs2", true}, // "ucs2_slovenian_ci",
+        {133, "ucs2", true}, // "ucs2_polish_ci",
+        {134, "ucs2", true}, // "ucs2_estonian_ci",
+        {135, "ucs2", true}, // "ucs2_spanish_ci",
+        {136, "ucs2", true}, // "ucs2_swedish_ci",
+        {137, "ucs2", true}, // "ucs2_turkish_ci",
+        {138, "ucs2", true}, // "ucs2_czech_ci",
+        {139, "ucs2", true}, // "ucs2_danish_ci",
+        {140, "ucs2", true}, // "ucs2_lithuanian_ci",
+        {141, "ucs2", true}, // "ucs2_slovak_ci",
+        {142, "ucs2", true}, // "ucs2_spanish2_ci",
+        {143, "ucs2", true}, // "ucs2_roman_ci",
+        {144, "ucs2", true}, // "ucs2_persian_ci",
+        {145, "ucs2", true}, // "ucs2_esperanto_ci",
+        {146, "ucs2", true}, // "ucs2_hungarian_ci",
+        {147, "ucs2", true}, // "ucs2_sinhala_ci",
+        {148, "ucs2", true}, // "ucs2_german2_ci",
+        {149, "ucs2", true}, // "ucs2_croatian_ci",
+        {150, "ucs2", true}, // "ucs2_unicode_520_ci",
+        {151, "ucs2", true}, // "ucs2_vietnamese_ci",
+        {159, "ucs2", true}, // "ucs2_general_mysql500_ci",
+        {160, "utf32", true}, // "utf32_unicode_ci",
+        {161, "utf32", true}, // "utf32_icelandic_ci",
+        {162, "utf32", true}, // "utf32_latvian_ci",
+        {163, "utf32", true}, // "utf32_romanian_ci",
+        {164, "utf32", true}, // "utf32_slovenian_ci",
+        {165, "utf32", true}, // "utf32_polish_ci",
+        {166, "utf32", true}, // "utf32_estonian_ci",
+        {167, "utf32", true}, // "utf32_spanish_ci",
+        {168, "utf32", true}, // "utf32_swedish_ci",
+        {169, "utf32", true}, // "utf32_turkish_ci",
+        {170, "utf32", true}, // "utf32_czech_ci",
+        {171, "utf32", true}, // "utf32_danish_ci",
+        {172, "utf32", true}, // "utf32_lithuanian_ci",
+        {173, "utf32", true}, // "utf32_slovak_ci",
+        {174, "utf32", true}, // "utf32_spanish2_ci",
+        {175, "utf32", true}, // "utf32_roman_ci",
+        {176, "utf32", true}, // "utf32_persian_ci",
+        {177, "utf32", true}, // "utf32_esperanto_ci",
+        {178, "utf32", true}, // "utf32_hungarian_ci",
+        {179, "utf32", true}, // "utf32_sinhala_ci",
+        {180, "utf32", true}, // "utf32_german2_ci",
+        {181, "utf32", true}, // "utf32_croatian_ci",
+        {182, "utf32", true}, // "utf32_unicode_520_ci",
+        {183, "utf32", true}, // "utf32_vietnamese_ci",
+        {192, "utf8", false}, // "utf8_unicode_ci",
+        {193, "utf8", false}, // "utf8_icelandic_ci",
+        {194, "utf8", false}, // "utf8_latvian_ci",
+        {195, "utf8", false}, // "utf8_romanian_ci",
+        {196, "utf8", false}, // "utf8_slovenian_ci",
+        {197, "utf8", false}, // "utf8_polish_ci",
+        {198, "utf8", false}, // "utf8_estonian_ci",
+        {199, "utf8", false}, // "utf8_spanish_ci",
+        {200, "utf8", false}, // "utf8_swedish_ci",
+        {201, "utf8", false}, // "utf8_turkish_ci",
+        {202, "utf8", false}, // "utf8_czech_ci",
+        {203, "utf8", false}, // "utf8_danish_ci",
+        {204, "utf8", false}, // "utf8_lithuanian_ci",
+        {205, "utf8", false}, // "utf8_slovak_ci",
+        {206, "utf8", false}, // "utf8_spanish2_ci",
+        {207, "utf8", false}, // "utf8_roman_ci",
+        {208, "utf8", false}, // "utf8_persian_ci",
+        {209, "utf8", false}, // "utf8_esperanto_ci",
+        {210, "utf8", false}, // "utf8_hungarian_ci",
+        {211, "utf8", false}, // "utf8_sinhala_ci",
+        {212, "utf8", false}, // "utf8_german2_ci",
+        {213, "utf8", false}, // "utf8_croatian_ci",
+        {214, "utf8", false}, // "utf8_unicode_520_ci",
+        {215, "utf8", false}, // "utf8_vietnamese_ci",
+        {223, "utf8", false}, // "utf8_general_mysql500_ci",
+        {224, "utf8mb4", false}, // "utf8mb4_unicode_ci",
+        {225, "utf8mb4", false}, // "utf8mb4_icelandic_ci",
+        {226, "utf8mb4", false}, // "utf8mb4_latvian_ci",
+        {227, "utf8mb4", false}, // "utf8mb4_romanian_ci",
+        {228, "utf8mb4", false}, // "utf8mb4_slovenian_ci",
+        {229, "utf8mb4", false}, // "utf8mb4_polish_ci",
+        {230, "utf8mb4", false}, // "utf8mb4_estonian_ci",
+        {231, "utf8mb4", false}, // "utf8mb4_spanish_ci",
+        {232, "utf8mb4", false}, // "utf8mb4_swedish_ci",
+        {233, "utf8mb4", false}, // "utf8mb4_turkish_ci",
+        {234, "utf8mb4", false}, // "utf8mb4_czech_ci",
+        {235, "utf8mb4", false}, // "utf8mb4_danish_ci",
+        {236, "utf8mb4", false}, // "utf8mb4_lithuanian_ci",
+        {237, "utf8mb4", false}, // "utf8mb4_slovak_ci",
+        {238, "utf8mb4", false}, // "utf8mb4_spanish2_ci",
+        {239, "utf8mb4", false}, // "utf8mb4_roman_ci",
+        {240, "utf8mb4", false}, // "utf8mb4_persian_ci",
+        {241, "utf8mb4", false}, // "utf8mb4_esperanto_ci",
+        {242, "utf8mb4", false}, // "utf8mb4_hungarian_ci",
+        {243, "utf8mb4", false}, // "utf8mb4_sinhala_ci",
+        {244, "utf8mb4", false}, // "utf8mb4_german2_ci",
+        {245, "utf8mb4", false}, // "utf8mb4_croatian_ci",
+        {246, "utf8mb4", false}, // "utf8mb4_unicode_520_ci",
+        {247, "utf8mb4", false}, // "utf8mb4_vietnamese_ci",
+        {248, "gb18030", true}, // "gb18030_chinese_ci",
+        {249, "gb18030", true}, // "gb18030_bin",
+        {250, "gb18030", true}, // "gb18030_unicode_520_ci",
+        {255, "utf8mb4", false}, // "utf8mb4_0900_ai_ci",
+        {256, "utf8mb4", false}, // "utf8mb4_de_pb_0900_ai_ci",
+        {257, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
+        {258, "utf8mb4", false}, // "utf8mb4_lv_0900_ai_ci",
+        {259, "utf8mb4", false}, // "utf8mb4_ro_0900_ai_ci",
+        {260, "utf8mb4", false}, // "utf8mb4_sl_0900_ai_ci",
+        {261, "utf8mb4", false}, // "utf8mb4_pl_0900_ai_ci",
+        {262, "utf8mb4", false}, // "utf8mb4_et_0900_ai_ci",
+        {263, "utf8mb4", false}, // "utf8mb4_es_0900_ai_ci",
+        {264, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
+        {265, "utf8mb4", false}, // "utf8mb4_tr_0900_ai_ci",
+        {266, "utf8mb4", false}, // "utf8mb4_cs_0900_ai_ci",
+        {267, "utf8mb4", false}, // "utf8mb4_da_0900_ai_ci",
+        {268, "utf8mb4", false}, // "utf8mb4_lt_0900_ai_ci",
+        {269, "utf8mb4", false}, // "utf8mb4_sk_0900_ai_ci",
+        {270, "utf8mb4", false}, // "utf8mb4_es_trad_0900_ai_ci",
+        {271, "utf8mb4", false}, // "utf8mb4_la_0900_ai_ci",
+        {272, "utf8mb4", false}, // "utf8mb4_fa_0900_ai_ci",
+        {273, "utf8mb4", false}, // "utf8mb4_eo_0900_ai_ci",
+        {274, "utf8mb4", false}, // "utf8mb4_hu_0900_ai_ci",
+        {275, "utf8mb4", false}, // "utf8mb4_hr_0900_ai_ci",
+        {276, "utf8mb4", false}, // "utf8mb4_si_0900_ai_ci",
+        {277, "utf8mb4", false}, // "utf8mb4_vi_0900_ai_ci",
+        {278, "utf8mb4", false}, // "utf8mb4_0900_as_cs",
+        {279, "utf8mb4", false}, // "utf8mb4_de_pb_0900_as_cs",
+        {280, "utf8mb4", false}, // "utf8mb4_is_0900_as_cs",
+        {281, "utf8mb4", false}, // "utf8mb4_lv_0900_as_cs",
+        {282, "utf8mb4", false}, // "utf8mb4_ro_0900_as_cs",
+        {283, "utf8mb4", false}, // "utf8mb4_sl_0900_as_cs",
+        {284, "utf8mb4", false}, // "utf8mb4_pl_0900_as_cs",
+        {285, "utf8mb4", false}, // "utf8mb4_et_0900_as_cs",
+        {286, "utf8mb4", false}, // "utf8mb4_es_0900_as_cs",
+        {287, "utf8mb4", false}, // "utf8mb4_sv_0900_as_cs",
+        {288, "utf8mb4", false}, // "utf8mb4_tr_0900_as_cs",
+        {289, "utf8mb4", false}, // "utf8mb4_cs_0900_as_cs",
+        {290, "utf8mb4", false}, // "utf8mb4_da_0900_as_cs"
+        {291, "utf8mb4", false}, // "utf8mb4_lt_0900_as_cs"
+        {292, "utf8mb4", false}, // "utf8mb4_sk_0900_as_cs"
+        {293, "utf8mb4", false}, // "utf8mb4_es_trad_0900_as_cs"
+        {294, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
+        {295, "utf8mb4", false}, // "utf8mb4_fa_0900_as_cs"
+        {296, "utf8mb4", false}, // "utf8mb4_eo_0900_as_cs"
+        {297, "utf8mb4", false}, // "utf8mb4_hu_0900_as_cs"
+        {298, "utf8mb4", false}, // "utf8mb4_hr_0900_as_cs"
+        {299, "utf8mb4", false}, // "utf8mb4_si_0900_as_cs"
+        {300, "utf8mb4", false}, // "utf8mb4_vi_0900_as_cs"
+        {303, "utf8mb4", false}, // "utf8mb4_ja_0900_as_cs_ks"
+        {304, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
+        {305, "utf8mb4", false}, // "utf8mb4_0900_as_ci"
+        {306, "utf8mb4", false}, // "utf8mb4_ru_0900_ai_ci"
+        {307, "utf8mb4", false}, // "utf8mb4_ru_0900_as_cs"
+        {308, "utf8mb4", false}, // "utf8mb4_zh_0900_as_cs"
+        {309, "utf8mb4", false} // "utf8mb4_0900_bin"
+    };
+
+    MySQLCharset charset;
+
+    for (auto & item : result)
+    {
+        EXPECT_TRUE(charset.needConvert(item.id) == item.need_convert);
+        if (charset.needConvert(item.id))
+        {
+            EXPECT_TRUE(charset.getCharsetFromId(item.id) == item.name);
+        }
+    }
+}
+
+}
diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index 8cf9e67bf63..bc19101efb8 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -980,6 +980,89 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name
     mysql_node.query("DROP DATABASE test_database_event")
 
 
+def text_blob_with_charset_test(clickhouse_node, mysql_node, service_name):
+    db = "text_blob_with_charset_test"
+    mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"CREATE DATABASE {db} DEFAULT CHARACTER SET 'utf8'")
+
+    mysql_node.query(
+        f"CREATE TABLE {db}.test_table_1 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET big5, d longtext, e varchar(256), f char(4)) ENGINE = InnoDB DEFAULT CHARSET=gbk"
+    )
+    mysql_node.query(
+        f"CREATE TABLE {db}.test_table_2 (a INT NOT NULL PRIMARY KEY, b blob, c longblob) ENGINE = InnoDB DEFAULT CHARSET=gbk"
+    )
+    mysql_node.query(
+        f"CREATE TABLE {db}.test_table_3 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET gbk, d tinytext CHARSET big5, e varchar(256), f char(4)) ENGINE = InnoDB"
+    )
+
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_1 VALUES (1, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
+    )
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_2 VALUES (1, '你好', 0xFAAA00000000000DDCC)"
+    )
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_3 VALUES (1, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
+    )
+
+    clickhouse_node.query(
+        f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
+    )
+    assert db in clickhouse_node.query("SHOW DATABASES")
+
+    # from full replication
+    check_query(
+        clickhouse_node,
+        f"SHOW TABLES FROM {db} FORMAT TSV",
+        "test_table_1\ntest_table_2\ntest_table_3\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 1 FORMAT TSV",
+        "你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 1 FORMAT TSV",
+        "E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 1 FORMAT TSV",
+        "你好\t世界\thello\t您Hi您\t您Hi您\n",
+    )
+
+    # from increment replication
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_1 VALUES (2, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
+    )
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_2 VALUES (2, '你好', 0xFAAA00000000000DDCC)"
+    )
+    mysql_node.query(
+        f"INSERT INTO {db}.test_table_3 VALUES (2, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
+    )
+
+    check_query(
+        clickhouse_node,
+        f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 2 FORMAT TSV",
+        "你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 2 FORMAT TSV",
+        "E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 2 FORMAT TSV",
+        "你好\t世界\thello\t您Hi您\t您Hi您\n",
+    )
+    clickhouse_node.query(f"DROP DATABASE {db}")
+    mysql_node.query(f"DROP DATABASE {db}")
+
+
 def select_without_columns(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE IF EXISTS db")
     clickhouse_node.query("DROP DATABASE IF EXISTS db")
diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py
index 21316d1a474..df670d6e84d 100644
--- a/tests/integration/test_materialized_mysql_database/test.py
+++ b/tests/integration/test_materialized_mysql_database/test.py
@@ -262,6 +262,12 @@ def test_materialized_database_ddl_with_empty_transaction_8_0(
     )
 
 
+def test_text_blob_charset(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialized_with_ddl.text_blob_with_charset_test(
+        clickhouse_node, started_mysql_8_0, "mysql80"
+    )
+
+
 def test_select_without_columns_5_7(
     started_cluster, started_mysql_5_7, clickhouse_node
 ):
diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp
index 68558340180..d1f868eba97 100644
--- a/utils/check-mysql-binlog/main.cpp
+++ b/utils/check-mysql-binlog/main.cpp
@@ -11,7 +11,9 @@
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <Core/MySQL/MySQLReplication.h>
+#include <Core/MySQL/MySQLCharset.h>
 
+static DB::MySQLCharsetPtr charset = std::make_shared<DB::MySQLCharset>();
 static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
     DB::MySQLReplication::EventHeader & header, DB::ReadBuffer & payload,
     std::shared_ptr<DB::MySQLReplication::TableMapEvent> & last_table_map_event, bool exist_checksum)
@@ -64,7 +66,7 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
         {
             DB::MySQLReplication::TableMapEventHeader map_event_header;
             map_event_header.parse(*event_payload);
-            event = std::make_shared<DB::MySQLReplication::TableMapEvent>(std::move(header), map_event_header);
+            event = std::make_shared<DB::MySQLReplication::TableMapEvent>(std::move(header), map_event_header, charset);
             event->parseEvent(*event_payload);
             last_table_map_event = std::static_pointer_cast<DB::MySQLReplication::TableMapEvent>(event);
             break;

From 9b7ecbaa277bc29962cccd1e0faf379fbbef36af Mon Sep 17 00:00:00 2001
From: Chen768959 <67011523+Chen768959@users.noreply.github.com>
Date: Tue, 18 Jul 2023 22:00:47 +0800
Subject: [PATCH 107/242] fix issue#50582

The buildInputOrderInfo() method in optimizeReadInOrder.cpp adds constant columns to order_key_prefix_descr. However, since header_without_constants does not contain constant columns, the constant columns in description_sorted_ also need to be removed in advance.
---
 .../Transforms/FinishSortingTransform.cpp      | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index 05fddc35e15..dd61472bc37 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -35,9 +35,25 @@ FinishSortingTransform::FinishSortingTransform(
                         "Can't finish sorting. SortDescription "
                         "of already sorted stream is not prefix of SortDescription needed to sort");
 
+    /// Remove constants from description_sorted_.
+    SortDescription description_sorted_without_constants;
+    description_sorted_without_constants.reserve(description_sorted_.size());
+    size_t num_columns = header.columns();
+    ColumnNumbers map(num_columns, num_columns);
+    for (const auto & column_description : description_sorted_)
+    {
+        auto old_pos = header.getPositionByName(column_description.column_name);
+        auto new_pos = map[old_pos];
+
+        if (new_pos < num_columns)
+        {
+            description_sorted_without_constants.push_back(column_description);
+        }
+    }
+    
     /// The target description is modified in SortingTransform constructor.
     /// To avoid doing the same actions with description_sorted just copy it from prefix of target description.
-    for (const auto & column_sort_desc : description_sorted_)
+    for (const auto & column_sort_desc : description_sorted_without_constants)
         description_with_positions.emplace_back(column_sort_desc, header_without_constants.getPositionByName(column_sort_desc.column_name));
 }
 

From f091baaa0faf7fbd63460c486a4853bb66b34b5b Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 14:33:26 +0000
Subject: [PATCH 108/242] Remove whitespaces

---
 src/Common/SystemLogBase.cpp | 4 ++--
 src/Common/SystemLogBase.h   | 2 +-
 src/Interpreters/SystemLog.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index e9442617acd..1d0673e30dd 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -196,8 +196,8 @@ SystemLogQueue<LogElement>::Index SystemLogQueue<LogElement>::pop(std::vector<Lo
 
 template <typename LogElement>
 void SystemLogQueue<LogElement>::shutdown()
-{ 
-    is_shutdown = true;         
+{
+    is_shutdown = true;
     /// Tell thread to shutdown.
     flush_event.notify_all();
 }
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 281cd06354a..9436137d4a8 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -114,7 +114,7 @@ private:
     Index flushed_up_to = 0;
     // Logged overflow message at this queue front index
     Index logged_queue_full_at_index = -1;
-    
+
     bool is_shutdown = false;
 
     std::condition_variable flush_event;
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index fe9e4aa35d2..6f61e075b49 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -128,10 +128,10 @@ public:
     void notifyFlush(bool force);
 
     void stopFlushThread() override;
-  
+
 protected:
     Poco::Logger * log;
-  
+
     using ISystemLog::is_shutdown;
     using ISystemLog::saving_thread;
 

From 9d0391e6fbc4c88dd68a414f84ae285cb13f53f3 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 18:13:57 +0000
Subject: [PATCH 109/242] Add ifndef to fix build

---
 src/Interpreters/SystemLog.cpp | 1 -
 src/Loggers/Loggers.cpp        | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 3193baa551f..efeb22ba370 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -101,7 +101,6 @@ namespace
 namespace
 {
 
-constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
 constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000;
 
 /// Creates a system log with MergeTree engine using parameters from config
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index a9869847f65..4cc74902ee1 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -34,7 +34,9 @@ static std::string createDirectory(const std::string & file)
     return path;
 }
 
+#ifndef WITHOUT_TEXT_LOG
 constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
+#endif
 
 void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
 {

From 9b951e965bd324635435ecb38f14971c6f1944a4 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Tue, 18 Jul 2023 19:48:43 +0000
Subject: [PATCH 110/242] Restore define

---
 src/Interpreters/SystemLog.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index efeb22ba370..3193baa551f 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -101,6 +101,7 @@ namespace
 namespace
 {
 
+constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
 constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000;
 
 /// Creates a system log with MergeTree engine using parameters from config

From 6fd27b6cd882b31f73ecd27ca7ae0bb2f0d25854 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jul 2023 22:19:35 +0200
Subject: [PATCH 111/242] Fix build

---
 src/Storages/StorageMergeTree.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 085d532b09c..32e100edc4d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -45,6 +45,7 @@
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <fmt/core.h>
 
+
 namespace DB
 {
 
@@ -940,7 +941,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
 
     SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT;
 
-    auto is_background_memory_usage_ok = [](String * disable_reason) -> bool
+    auto is_background_memory_usage_ok = [](String & disable_reason) -> bool
     {
         if (canEnqueueBackgroundTask())
             return true;

From 3715c7f461dc9a0c48ea3cfac52ef52c47a53c64 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 19 Jul 2023 01:08:14 +0200
Subject: [PATCH 112/242] Fix error in a test

---
 tests/queries/0_stateless/02293_selected_rows_and_merges.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
index 76c562c9744..2f281d27814 100755
--- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
+++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh
@@ -24,4 +24,4 @@ ${CLICKHOUSE_CLIENT} -q "system flush logs"
 
 # Here for mutation all values are 0, cause mutation is executed async.
 # It's pretty hard to write a test with total counter.
-${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()"
+${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'] > 1000, ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()"

From c724816cb8403c07d2d4c4601e0c4c9dcfc16e5f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 19 Jul 2023 01:15:16 +0200
Subject: [PATCH 113/242] Fix test

---
 .../configs/config.d/merge_tree.xml           |  5 +++++
 .../configs/config.d/users.xml                |  5 -----
 .../configs/config.xml                        | 22 -------------------
 .../test_merge_tree_s3_failover/test.py       |  1 +
 4 files changed, 6 insertions(+), 27 deletions(-)
 create mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml
 delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml
 delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.xml

diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml
new file mode 100644
index 00000000000..c58c957b596
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <merge_tree>
+        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
+    </merge_tree>
+</clickhouse>
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml
deleted file mode 100644
index 0011583a68c..00000000000
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default/>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml
deleted file mode 100644
index 743d75d9a21..00000000000
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<clickhouse>
-    <tcp_port>9000</tcp_port>
-    <listen_host>127.0.0.1</listen_host>
-
-    <openSSL>
-        <client>
-            <cacheSessions>true</cacheSessions>
-            <verificationMode>none</verificationMode>
-            <invalidCertificateHandler>
-                <name>AcceptCertificateHandler</name>
-            </invalidCertificateHandler>
-        </client>
-    </openSSL>
-
-    <max_concurrent_queries>500</max_concurrent_queries>
-    <path>./clickhouse/</path>
-    <users_config>users.xml</users_config>
-
-    <merge_tree>
-        <ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
-    </merge_tree>
-</clickhouse>
diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py
index 90dda631924..57ca5ed5ffd 100644
--- a/tests/integration/test_merge_tree_s3_failover/test.py
+++ b/tests/integration/test_merge_tree_s3_failover/test.py
@@ -67,6 +67,7 @@ def cluster():
                 "configs/config.d/storage_conf.xml",
                 "configs/config.d/instant_moves.xml",
                 "configs/config.d/part_log.xml",
+                "configs/config.d/merge_tree.xml"
             ],
             with_minio=True,
         )

From 3c8141529f0f8d4d7c48c077e91af77ee9885ad8 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 18 Jul 2023 23:25:21 +0000
Subject: [PATCH 114/242] Automatic style fix

---
 tests/integration/test_merge_tree_s3_failover/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py
index 57ca5ed5ffd..b47d741e78e 100644
--- a/tests/integration/test_merge_tree_s3_failover/test.py
+++ b/tests/integration/test_merge_tree_s3_failover/test.py
@@ -67,7 +67,7 @@ def cluster():
                 "configs/config.d/storage_conf.xml",
                 "configs/config.d/instant_moves.xml",
                 "configs/config.d/part_log.xml",
-                "configs/config.d/merge_tree.xml"
+                "configs/config.d/merge_tree.xml",
             ],
             with_minio=True,
         )

From d666272b7666967cf1d1bed3804673e3beb1ca64 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 19 Jul 2023 05:29:12 +0200
Subject: [PATCH 115/242] Enable
 `allow_vertical_merges_from_compact_to_wide_parts` by default

---
 src/Storages/MergeTree/MergeTreeSettings.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index dc24327712c..783fde088dc 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -160,7 +160,7 @@ struct Settings;
     M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
     M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \
     M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \
-    M(Bool, allow_vertical_merges_from_compact_to_wide_parts, false, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \
+    M(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \
     M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \
     M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \
     \
@@ -169,8 +169,9 @@ struct Settings;
     M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \
     M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \
     M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \
-    M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \
+    M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \
     M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
+    \
     /** Compress marks and primary key. */ \
     M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
     M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \

From c3b8978023fae8adaa98a111f6253be50ee72a35 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 19 Jul 2023 11:53:03 +0800
Subject: [PATCH 116/242] Don't use minmax_count projections when counting
 nullable columns

---
 .../optimizeUseAggregateProjection.cpp        | 32 ++++---------------
 ..._count_projection_count_nullable.reference |  1 +
 ...minmax_count_projection_count_nullable.sql |  9 ++++++
 3 files changed, 17 insertions(+), 25 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference
 create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index f183bdca7a9..4f25118958f 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -92,18 +92,6 @@ static AggregateProjectionInfo getAggregatingProjectionInfo(
     return info;
 }
 
-static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & names)
-{
-    for (const auto & query_name : names)
-    {
-        auto jt = index.find(query_name);
-        if (jt == index.end() || jt->second->result_type->isNullable())
-            return true;
-    }
-
-    return false;
-}
-
 struct AggregateFunctionMatch
 {
     const AggregateDescription * description = nullptr;
@@ -170,20 +158,14 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
             }
 
             /// This is a special case for the function count().
-            /// We can assume that 'count(expr) == count()' if expr is not nullable.
-            if (typeid_cast<const AggregateFunctionCount *>(candidate.function.get()))
+            /// We can assume that 'count(expr) == count()' if expr is not nullable,
+            /// which can be verified by simply casting to `AggregateFunctionCount *`.
+            if (typeid_cast<const AggregateFunctionCount *>(aggregate.function.get()))
             {
-                bool has_nullable_or_missing_arg = false;
-                has_nullable_or_missing_arg |= hasNullableOrMissingColumn(query_index, aggregate.argument_names);
-                has_nullable_or_missing_arg |= hasNullableOrMissingColumn(proj_index, candidate.argument_names);
-
-                if (!has_nullable_or_missing_arg)
-                {
-                    /// we can ignore arguments for count()
-                    found_match = true;
-                    res.push_back({&candidate, DataTypes()});
-                    break;
-                }
+                /// we can ignore arguments for count()
+                found_match = true;
+                res.push_back({&candidate, DataTypes()});
+                break;
             }
 
             /// Now, function names and types matched.
diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql
new file mode 100644
index 00000000000..048d725e0a0
--- /dev/null
+++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (`val` LowCardinality(Nullable(String))) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192;
+
+insert into test select number == 3 ? 'some value' : null from numbers(5);
+
+SELECT count(val) FROM test SETTINGS optimize_use_implicit_projections = 1;
+
+DROP TABLE test;

From 65de310137a4e192499119128aa069375eb007c8 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 19 Jul 2023 06:15:57 +0000
Subject: [PATCH 117/242] Return back SystemLogBase

---
 src/Common/SystemLogBase.cpp   | 40 +++++++++++++++++++++++++++++++++-
 src/Common/SystemLogBase.h     | 33 ++++++++++++++++++++++++++++
 src/Interpreters/SystemLog.cpp | 29 ++----------------------
 src/Interpreters/SystemLog.h   | 16 +++-----------
 4 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 1d0673e30dd..baee7021c35 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -202,7 +202,45 @@ void SystemLogQueue<LogElement>::shutdown()
     flush_event.notify_all();
 }
 
-#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogQueue<ELEMENT>;
+template <typename LogElement>
+SystemLogBase<LogElement>::SystemLogBase(
+    const String& name,
+    size_t flush_interval_milliseconds_,
+    std::shared_ptr<SystemLogQueue<LogElement>> queue_)
+    : queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(name, flush_interval_milliseconds_))
+{
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::startup()
+{
+    std::lock_guard lock(queue->mutex);
+    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::add(const LogElement & element)
+{
+    queue->push(element);
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::flush(bool force)
+{
+    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
+    if (this_thread_requested_offset == uint64_t(-1))
+        return;
+
+    queue->waitFlush(this_thread_requested_offset);
+}
+
+template <typename LogElement>
+void SystemLogBase<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
+
+#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
 
+#define INSTANTIATE_SYSTEM_LOG_QUEUE(ELEMENT) template class SystemLogQueue<ELEMENT>;
+SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE)
+
 }
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 9436137d4a8..5718182e115 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -121,4 +121,37 @@ private:
     const size_t flush_interval_milliseconds;
 };
 
+
+
+template <typename LogElement>
+class SystemLogBase : public ISystemLog
+{
+public:
+    using Self = SystemLogBase;
+
+    SystemLogBase(
+        const String& name,
+        size_t flush_interval_milliseconds_,
+        std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
+
+    void startup() override;
+
+    /** Append a record into log.
+      * Writing to table will be done asynchronously and in case of failure, record could be lost.
+      */
+    void add(const LogElement & element);
+
+    /// Flush data in the buffer to disk. Block the thread until the data is stored on disk.
+    void flush(bool force) override;
+
+    /// Non-blocking flush data in the buffer to disk.
+    void notifyFlush(bool force);
+
+    String getName() const override { return LogElement::name(); }
+
+    static const char * getDefaultOrderBy() { return "event_date, event_time"; }
+
+protected:
+    std::shared_ptr<SystemLogQueue<LogElement>> queue;
+};
 }
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 3193baa551f..674210cbaad 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -334,23 +334,16 @@ SystemLog<LogElement>::SystemLog(
     const String & storage_def_,
     size_t flush_interval_milliseconds_,
     std::shared_ptr<SystemLogQueue<LogElement>> queue_)
-    : WithContext(context_)
+    : Base(database_name_ + "." + table_name_, flush_interval_milliseconds_, queue_)
+    , WithContext(context_)
     , log(&Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"))
     , table_id(database_name_, table_name_)
     , storage_def(storage_def_)
     , create_query(serializeAST(*getCreateTableQuery()))
-    , queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(database_name_ + "." + table_name_, flush_interval_milliseconds_))
 {
     assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE);
 }
 
-template <typename LogElement>
-void SystemLog<LogElement>::startup()
-{
-    std::lock_guard lock(queue->mutex);
-    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
-}
-
 template <typename LogElement>
 void SystemLog<LogElement>::shutdown()
 {
@@ -618,24 +611,6 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
 
     return create;
 }
-template <typename LogElement>
-void SystemLog<LogElement>::add(const LogElement & element)
-{
-    queue->push(element);
-}
-
-template <typename LogElement>
-void SystemLog<LogElement>::flush(bool force)
-{
-    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
-    if (this_thread_requested_offset == uint64_t(-1))
-        return;
-
-    queue->waitFlush(this_thread_requested_offset);
-}
-
-template <typename LogElement>
-void SystemLog<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
 
 #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog<ELEMENT>;
 SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG)
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 6f61e075b49..91fb7f49221 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -89,10 +89,11 @@ struct SystemLogs
 
 
 template <typename LogElement>
-class SystemLog : public ISystemLog, private boost::noncopyable, WithContext
+class SystemLog : public SystemLogBase<LogElement>, private boost::noncopyable, WithContext
 {
 public:
     using Self = SystemLog;
+    using Base = SystemLogBase<LogElement>;
 
     /** Parameter: table name where to write log.
       * If table is not exists, then it get created with specified engine.
@@ -110,23 +111,12 @@ public:
         size_t flush_interval_milliseconds_,
         std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
 
-    void startup() override;
     /** Append a record into log.
       * Writing to table will be done asynchronously and in case of failure, record could be lost.
       */
-    void add(const LogElement & element);
 
     void shutdown() override;
 
-    String getName() const override { return LogElement::name(); }
-    static const char * getDefaultOrderBy() { return "event_date, event_time"; }
-
-    /// Flush data in the buffer to disk. Block the thread until the data is stored on disk.
-    void flush(bool force) override;
-
-    /// Non-blocking flush data in the buffer to disk.
-    void notifyFlush(bool force);
-
     void stopFlushThread() override;
 
 protected:
@@ -134,6 +124,7 @@ protected:
 
     using ISystemLog::is_shutdown;
     using ISystemLog::saving_thread;
+    using Base::queue;
 
 private:
 
@@ -144,7 +135,6 @@ private:
     String create_query;
     String old_create_query;
     bool is_prepared = false;
-    std::shared_ptr<SystemLogQueue<LogElement>> queue;
 
     /** Creates new table if it does not exist.
       * Renames old table if its structure is not suitable.

From ee0453ed00ab5ecb232557e29d4e1f6365d83cd0 Mon Sep 17 00:00:00 2001
From: Chen768959 <67011523+Chen768959@users.noreply.github.com>
Date: Wed, 19 Jul 2023 14:18:50 +0800
Subject: [PATCH 118/242] fix issue#50582 tests

Reproduced issue #50582, which occurs when sorting column contains constants and triggers the FinishSortingTransform.
---
 .../02815_fix_not_found_constants_col_in_block.reference     | 2 ++
 .../02815_fix_not_found_constants_col_in_block.sql           | 5 +++++
 2 files changed, 7 insertions(+)
 create mode 100644 tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference
 create mode 100644 tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql

diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference
new file mode 100644
index 00000000000..f2d4d23d9e3
--- /dev/null
+++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference
@@ -0,0 +1,2 @@
+\N	1	19000
+\N	1	19000
diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql
new file mode 100644
index 00000000000..c56d59c72d6
--- /dev/null
+++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql
@@ -0,0 +1,5 @@
+DROP TABLE IF EXISTS t0;
+CREATE TABLE t0 (vkey UInt32, c0 Float32, primary key(c0)) engine = AggregatingMergeTree;
+insert into t0 values (19000, 1);
+select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc, c_2_2 asc;
+select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc;

From d601d86fad94250ca3b749baa4478679cd6e1973 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 19 Jul 2023 07:22:25 +0000
Subject: [PATCH 119/242] Remove empty line

---
 src/Common/SystemLogBase.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 5718182e115..fa9f9b6f72e 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -122,7 +122,6 @@ private:
 };
 
 
-
 template <typename LogElement>
 class SystemLogBase : public ISystemLog
 {

From 549026f0ae8041ba40f4557922c480f2f07715bf Mon Sep 17 00:00:00 2001
From: Chen768959 <67011523+Chen768959@users.noreply.github.com>
Date: Wed, 19 Jul 2023 16:11:14 +0800
Subject: [PATCH 120/242] fix style error

fix Trailing whitespaces
---
 src/Processors/Transforms/FinishSortingTransform.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index dd61472bc37..066928446f2 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -50,7 +50,6 @@ FinishSortingTransform::FinishSortingTransform(
             description_sorted_without_constants.push_back(column_description);
         }
     }
-    
     /// The target description is modified in SortingTransform constructor.
     /// To avoid doing the same actions with description_sorted just copy it from prefix of target description.
     for (const auto & column_sort_desc : description_sorted_without_constants)

From aa888ad64a95ef801977844b2b253bb8162cfc1a Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 19 Jul 2023 08:46:57 +0000
Subject: [PATCH 121/242] Separate thread mutex, add test

---
 src/Common/SystemLogBase.cpp                                   | 2 +-
 src/Common/SystemLogBase.h                                     | 3 ++-
 src/Interpreters/SystemLog.cpp                                 | 2 +-
 src/Interpreters/SystemLog.h                                   | 1 +
 tests/queries/0_stateless/02813_starting_in_text_log.reference | 1 +
 tests/queries/0_stateless/02813_starting_in_text_log.sql       | 2 ++
 6 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02813_starting_in_text_log.reference
 create mode 100755 tests/queries/0_stateless/02813_starting_in_text_log.sql

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index baee7021c35..bed6d661db7 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -214,7 +214,7 @@ SystemLogBase<LogElement>::SystemLogBase(
 template <typename LogElement>
 void SystemLogBase<LogElement>::startup()
 {
-    std::lock_guard lock(queue->mutex);
+    std::lock_guard lock(thread_mutex);
     saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index fa9f9b6f72e..0ac376769ad 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -67,6 +67,7 @@ public:
     virtual void savingThreadFunction() = 0;
 
 protected:
+    std::mutex thread_mutex;
     std::unique_ptr<ThreadFromGlobalPool> saving_thread;
 
     bool is_shutdown = false;
@@ -93,10 +94,10 @@ public:
     Index pop(std::vector<LogElement>& output, bool& should_prepare_tables_anyway, bool& exit_this_thread);
     void confirm(Index to_flush_end);
 
+private:
     /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
     std::mutex mutex;
 
-private:
     Poco::Logger * log;
 
     // Queue is bounded. But its size is quite large to not block in all normal cases.
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 674210cbaad..0b89b1dec26 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -358,7 +358,7 @@ template <typename LogElement>
 void SystemLog<LogElement>::stopFlushThread()
 {
     {
-        std::lock_guard lock(queue->mutex);
+        std::lock_guard lock(thread_mutex);
 
         if (!saving_thread || !saving_thread->joinable())
             return;
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 91fb7f49221..5d8bb30150d 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -124,6 +124,7 @@ protected:
 
     using ISystemLog::is_shutdown;
     using ISystemLog::saving_thread;
+    using ISystemLog::thread_mutex;
     using Base::queue;
 
 private:
diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.reference b/tests/queries/0_stateless/02813_starting_in_text_log.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02813_starting_in_text_log.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql
new file mode 100755
index 00000000000..8ef78945a72
--- /dev/null
+++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql
@@ -0,0 +1,2 @@
+SYSTEM FLUSH LOGS;
+SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Application: Starting ClickHouse%';

From 53818dde8cef7dd573217fa049d01b233a076ac2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 19 Jul 2023 15:22:25 +0200
Subject: [PATCH 122/242] MergeTree/ReplicatedMergeTree should use server
 timezone for log entries

Otherwise session_timezone/use_client_time_zone will break things

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeTreeMutationEntry.cpp           | 2 +-
 src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp      | 2 +-
 src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
index cac26c5ac23..4dbccb91620 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
@@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP
     {
         auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
         *out << "format version: 1\n"
-            << "create time: " << LocalDateTime(create_time) << "\n";
+            << "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n";
         *out << "commands: ";
         commands.writeText(*out, /* with_pure_metadata_commands = */ false);
         *out << "\n";
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
index ac956433eab..9eb8b6ce24c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@@ -48,7 +48,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
         format_version = std::max<UInt8>(format_version, FORMAT_WITH_LOG_ENTRY_ID);
 
     out << "format version: " << format_version << "\n"
-        << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n"
+        << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n"
         << "source replica: " << source_replica << '\n'
         << "block_id: " << escape << block_id << '\n';
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp
index 1bbb246338c..e2c23ecfe85 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp
@@ -12,7 +12,7 @@ namespace DB
 void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const
 {
     out << "format version: 1\n"
-        << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n"
+        << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n"
         << "source replica: " << source_replica << "\n"
         << "block numbers count: " << block_numbers.size() << "\n";
 

From 688b55b6ff80ee333ab9ef318d42937d5b5d3064 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 19 Jul 2023 13:29:07 +0000
Subject: [PATCH 123/242] Try to fix test, rename arg

---
 src/Common/SystemLogBase.cpp                             | 9 +++++----
 src/Common/SystemLogBase.h                               | 4 ++--
 src/Loggers/Loggers.cpp                                  | 6 +++---
 tests/queries/0_stateless/02813_starting_in_text_log.sql | 2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)
 mode change 100755 => 100644 tests/queries/0_stateless/02813_starting_in_text_log.sql

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index bed6d661db7..8cf8103e1c7 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -41,9 +41,9 @@ ISystemLog::~ISystemLog() = default;
 
 template <typename LogElement>
 SystemLogQueue<LogElement>::SystemLogQueue(
-    const String & name_,
+    const String & table_name_,
     size_t flush_interval_milliseconds_)
-    : log(&Poco::Logger::get(name_))
+    : log(&Poco::Logger::get("SystemLogQueue (" + table_name_ + ")"))
     , flush_interval_milliseconds(flush_interval_milliseconds_)
 {}
 
@@ -120,6 +120,7 @@ void SystemLogQueue<LogElement>::push(const LogElement & element)
 template <typename LogElement>
 uint64_t SystemLogQueue<LogElement>::notifyFlush(bool should_prepare_tables_anyway)
 {
+    
     uint64_t this_thread_requested_offset;
 
     {
@@ -204,10 +205,10 @@ void SystemLogQueue<LogElement>::shutdown()
 
 template <typename LogElement>
 SystemLogBase<LogElement>::SystemLogBase(
-    const String& name,
+    const String& table_name_,
     size_t flush_interval_milliseconds_,
     std::shared_ptr<SystemLogQueue<LogElement>> queue_)
-    : queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(name, flush_interval_milliseconds_))
+    : queue(queue_ ? queue_ : std::make_shared<SystemLogQueue<LogElement>>(table_name_, flush_interval_milliseconds_))
 {
 }
 
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 0ac376769ad..3716584be24 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -80,7 +80,7 @@ class SystemLogQueue
 
 public:
     SystemLogQueue(
-        const String & name_,
+        const String & table_name_,
         size_t flush_interval_milliseconds_);
 
     void shutdown();
@@ -130,7 +130,7 @@ public:
     using Self = SystemLogBase;
 
     SystemLogBase(
-        const String& name,
+        const String& table_name_,
         size_t flush_interval_milliseconds_,
         std::shared_ptr<SystemLogQueue<LogElement>> queue_ = nullptr);
 
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index 4cc74902ee1..85a8152602f 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -255,10 +255,10 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
 #ifndef WITHOUT_TEXT_LOG
     if (config.has("text_log"))
     {
-        String text_log_level_str = config.getString("text_log.level", "");
-        int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str);
+        String text_log_level_str = config.getString("text_log.level", "trace");
+        int text_log_level = Poco::Logger::parseLevel(text_log_level_str);
         size_t flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds",
-                                                        DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
+            DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
         split->addTextLog(DB::TextLog::getLogQueue(flush_interval_milliseconds), text_log_level);
     }
 #endif
diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql
old mode 100755
new mode 100644
index 8ef78945a72..e007f58189e
--- a/tests/queries/0_stateless/02813_starting_in_text_log.sql
+++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql
@@ -1,2 +1,2 @@
 SYSTEM FLUSH LOGS;
-SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Application: Starting ClickHouse%';
+SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Starting ClickHouse%';

From 9f7e40e8e57cc5e8c997dff16b5c6645283ffcb3 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Wed, 19 Jul 2023 13:43:22 +0000
Subject: [PATCH 124/242] Remove empty line

---
 src/Common/SystemLogBase.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 8cf8103e1c7..294ba09e375 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -120,7 +120,6 @@ void SystemLogQueue<LogElement>::push(const LogElement & element)
 template <typename LogElement>
 uint64_t SystemLogQueue<LogElement>::notifyFlush(bool should_prepare_tables_anyway)
 {
-    
     uint64_t this_thread_requested_offset;
 
     {

From 2b8e4ebd4c3df56c2d3e445321cedb157c7956f7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 19 Jul 2023 19:48:39 +0000
Subject: [PATCH 125/242] Allow to disable decoding/encoding path in uri in URL
 engine

---
 base/poco/Foundation/include/Poco/URI.h |  6 +++-
 base/poco/Foundation/src/URI.cpp        | 39 ++++++++++++++++++-------
 docs/en/operations/settings/settings.md |  6 ++++
 src/Core/Settings.h                     |  1 +
 src/IO/ReadWriteBufferFromHTTP.cpp      |  6 ++--
 src/Storages/StorageURL.cpp             |  2 +-
 6 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index 1880af4ccd2..5e6e7efd938 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ public:
     URI();
     /// Creates an empty URI.
 
-    explicit URI(const std::string & uri);
+    explicit URI(const std::string & uri, bool decode_and_encode_path = true);
     /// Parses an URI from the given string. Throws a
     /// SyntaxException if the uri is not valid.
 
@@ -350,6 +350,8 @@ protected:
     static const std::string ILLEGAL;
 
 private:
+    void encodePath(std::string & encodedStr) const;
+
     std::string _scheme;
     std::string _userInfo;
     std::string _host;
@@ -357,6 +359,8 @@ private:
     std::string _path;
     std::string _query;
     std::string _fragment;
+
+    bool _decode_and_encode_path = true;
 };
 
 
diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 5543e02b279..91a82868dcf 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -36,8 +36,8 @@ URI::URI():
 }
 
 
-URI::URI(const std::string& uri):
-	_port(0)
+URI::URI(const std::string& uri, bool decode_and_encode_path):
+	_port(0), _decode_and_encode_path(decode_and_encode_path)
 {
 	parse(uri);
 }
@@ -107,7 +107,8 @@ URI::URI(const URI& uri):
 	_port(uri._port),
 	_path(uri._path),
 	_query(uri._query),
-	_fragment(uri._fragment)
+	_fragment(uri._fragment),
+    _decode_and_encode_path(uri._decode_and_encode_path)
 {
 }
 
@@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
 	_port(baseURI._port),
 	_path(baseURI._path),
 	_query(baseURI._query),
-	_fragment(baseURI._fragment)
+	_fragment(baseURI._fragment),
+    _decode_and_encode_path(baseURI._decode_and_encode_path)
 {
 	resolve(relativeURI);
 }
@@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
 		_path     = uri._path;
 		_query    = uri._query;
 		_fragment = uri._fragment;
+        _decode_and_encode_path = uri._decode_and_encode_path;
 	}
 	return *this;
 }
@@ -181,6 +184,7 @@ void URI::swap(URI& uri)
 	std::swap(_path, uri._path);
 	std::swap(_query, uri._query);
 	std::swap(_fragment, uri._fragment);
+    std::swap(_decode_and_encode_path, uri._decode_and_encode_path);
 }
 
 
@@ -201,7 +205,7 @@ std::string URI::toString() const
 	std::string uri;
 	if (isRelative())
 	{
-		encode(_path, RESERVED_PATH, uri);
+		encodePath(uri);
 	}
 	else
 	{
@@ -217,7 +221,7 @@ std::string URI::toString() const
 		{
 			if (!auth.empty() && _path[0] != '/')
 				uri += '/';
-			encode(_path, RESERVED_PATH, uri);
+            encodePath(uri);
 		}
 		else if (!_query.empty() || !_fragment.empty())
 		{
@@ -313,7 +317,10 @@ void URI::setAuthority(const std::string& authority)
 void URI::setPath(const std::string& path)
 {
 	_path.clear();
-	decode(path, _path);
+    if (_decode_and_encode_path)
+	    decode(path, _path);
+    else
+        _path = path;
 }
 
 	
@@ -418,7 +425,7 @@ void URI::setPathEtc(const std::string& pathEtc)
 std::string URI::getPathEtc() const
 {
 	std::string pathEtc;
-	encode(_path, RESERVED_PATH, pathEtc);
+	encodePath(pathEtc);
 	if (!_query.empty())
 	{
 		pathEtc += '?';
@@ -436,7 +443,7 @@ std::string URI::getPathEtc() const
 std::string URI::getPathAndQuery() const
 {
 	std::string pathAndQuery;
-	encode(_path, RESERVED_PATH, pathAndQuery);
+	encodePath(pathAndQuery);
 	if (!_query.empty())
 	{
 		pathAndQuery += '?';
@@ -626,6 +633,8 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin
 	for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
 	{
 		char c = *it;
+        if (c == '%')
+            throw std::runtime_error("WTF");
 		if ((c >= 'a' && c <= 'z') ||
 		    (c >= 'A' && c <= 'Z') ||
 		    (c >= '0' && c <= '9') ||
@@ -681,6 +690,13 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
 	}
 }
 
+void URI::encodePath(std::string & encodedStr) const
+{
+    if (_decode_and_encode_path)
+        encode(_path, RESERVED_PATH, encodedStr);
+    else
+        encodedStr = _path;
+}
 
 bool URI::isWellKnownPort() const
 {
@@ -820,7 +836,10 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
 {
 	std::string path;
 	while (it != end && *it != '?' && *it != '#') path += *it++;
-	decode(path, _path);
+    if (_decode_and_encode_path)
+	    decode(path, _path);
+    else
+        _path = path;
 }
 
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8b969f87a4d..db5d1a2f5d9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3466,6 +3466,12 @@ Possible values:
 
 Default value: `0`.
 
+## decode_and_encode_path_in_url {#decode_and_encode_path_in_url}
+
+Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
+
+Enabled by default.
+
 ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
 
 Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8f304f0aab6..ffa72d841be 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -621,6 +621,7 @@ class IColumn;
     M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
     M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
     M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
+    M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \
     M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
     M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
     M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index 6d1c0f7aafa..eea801ce65e 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::callWithRedirects(Poco::N
         current_session = session;
 
     call(current_session, response, method_, throw_on_all_errors, for_object_info);
-    Poco::URI prev_uri = uri;
+    saved_uri_redirect = uri;
 
     while (isRedirect(response.getStatus()))
     {
-        Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response);
-        prev_uri = uri_redirect;
+        Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response);
+        saved_uri_redirect = uri_redirect;
         if (remote_host_filter)
             remote_host_filter->checkURL(uri_redirect);
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index e6953afe68e..4cfefbc5527 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -389,7 +389,7 @@ std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource:
     for (; option != end; ++option)
     {
         bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end);
-        auto request_uri = Poco::URI(*option);
+        auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url);
 
         for (const auto & [param, value] : params)
             request_uri.addQueryParameter(param, value);

From 483ddb53ebfa01c02deda76a39bc44cc08df4f00 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 19 Jul 2023 19:51:58 +0000
Subject: [PATCH 126/242] Fixes

---
 base/poco/Foundation/src/URI.cpp             | 2 --
 docs/en/engines/table-engines/special/url.md | 1 +
 docs/en/sql-reference/table-functions/url.md | 3 ++-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 91a82868dcf..9bad1b39a87 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -633,8 +633,6 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin
 	for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
 	{
 		char c = *it;
-        if (c == '%')
-            throw std::runtime_error("WTF");
 		if ((c >= 'a' && c <= 'z') ||
 		    (c >= 'A' && c <= 'Z') ||
 		    (c >= '0' && c <= '9') ||
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 26d4975954f..9f2bf177c96 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default.
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 2ab43f1b895..96f36f03949 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -56,7 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default.
 
-**See Also**
+- **See Also**
 
 - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

From ff235e0f3078f6c27a9a1ab1383a91378313ab77 Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 20 Jul 2023 05:41:39 +0000
Subject: [PATCH 127/242] Turn off log in queue, fix data race

---
 src/Common/SystemLogBase.cpp | 9 +++++++--
 src/Common/SystemLogBase.h   | 3 ++-
 src/Interpreters/TextLog.h   | 6 ++++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 294ba09e375..d1845a292b9 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -42,10 +42,14 @@ ISystemLog::~ISystemLog() = default;
 template <typename LogElement>
 SystemLogQueue<LogElement>::SystemLogQueue(
     const String & table_name_,
-    size_t flush_interval_milliseconds_)
+    size_t flush_interval_milliseconds_,
+    bool turn_off_logger_)
     : log(&Poco::Logger::get("SystemLogQueue (" + table_name_ + ")"))
     , flush_interval_milliseconds(flush_interval_milliseconds_)
-{}
+{
+    if (turn_off_logger_)
+        log->setLevel(0);
+}
 
 static thread_local bool recursive_push_call = false;
 
@@ -197,6 +201,7 @@ SystemLogQueue<LogElement>::Index SystemLogQueue<LogElement>::pop(std::vector<Lo
 template <typename LogElement>
 void SystemLogQueue<LogElement>::shutdown()
 {
+    std::unique_lock lock(mutex);
     is_shutdown = true;
     /// Tell thread to shutdown.
     flush_event.notify_all();
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 3716584be24..f6e4a579edf 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -81,7 +81,8 @@ class SystemLogQueue
 public:
     SystemLogQueue(
         const String & table_name_,
-        size_t flush_interval_milliseconds_);
+        size_t flush_interval_milliseconds_,
+        bool turn_off_logger_ = false);
 
     void shutdown();
 
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 0febce03abc..60ca11632aa 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -40,6 +40,8 @@ struct TextLogElement
 class TextLog : public SystemLog<TextLogElement>
 {
 public:
+    using Queue = SystemLogQueue<TextLogElement>;
+
     TextLog(
         ContextPtr context_,
         const String & database_name_,
@@ -47,9 +49,9 @@ public:
         const String & storage_def_,
         size_t flush_interval_milliseconds_);
 
-    static std::shared_ptr<SystemLogQueue<TextLogElement>> getLogQueue(size_t flush_interval_milliseconds)
+    static std::shared_ptr<Queue> getLogQueue(size_t flush_interval_milliseconds)
     {
-        static std::shared_ptr<SystemLogQueue<TextLogElement>> queue = std::make_shared<SystemLogQueue<TextLogElement>>("text_log", flush_interval_milliseconds);
+        static std::shared_ptr<Queue> queue = std::make_shared<Queue>("text_log", flush_interval_milliseconds, true);
         return queue;
     }
 };

From c7ab6e908adf2a088ad41e00ea2bfad5ea16526a Mon Sep 17 00:00:00 2001
From: Dmitry Kardymon <d.kardymon@arenadata.io>
Date: Thu, 20 Jul 2023 08:55:22 +0000
Subject: [PATCH 128/242] Move tode to to try to make the diff simpler

---
 src/Common/SystemLogBase.cpp | 60 ++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index d1845a292b9..ed5ffd78a7b 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -121,6 +121,36 @@ void SystemLogQueue<LogElement>::push(const LogElement & element)
         LOG_INFO(log, "Queue is half full for system log '{}'.", demangle(typeid(*this).name()));
 }
 
+template <typename LogElement>
+void SystemLogBase<LogElement>::flush(bool force)
+{
+    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
+    if (this_thread_requested_offset == uint64_t(-1))
+        return;
+
+    queue->waitFlush(this_thread_requested_offset);
+}
+
+template <typename LogElement>
+void SystemLogQueue<LogElement>::waitFlush(uint64_t expected_flushed_up_to)
+{
+    // Use an arbitrary timeout to avoid endless waiting. 60s proved to be
+    // too fast for our parallel functional tests, probably because they
+    // heavily load the disk.
+    const int timeout_seconds = 180;
+    std::unique_lock lock(mutex);
+    bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
+    {
+        return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables;
+    });
+
+    if (!result)
+    {
+        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.",
+            toString(timeout_seconds), demangle(typeid(*this).name()));
+    }
+}
+
 template <typename LogElement>
 uint64_t SystemLogQueue<LogElement>::notifyFlush(bool should_prepare_tables_anyway)
 {
@@ -145,26 +175,6 @@ uint64_t SystemLogQueue<LogElement>::notifyFlush(bool should_prepare_tables_anyw
     return this_thread_requested_offset;
 }
 
-template <typename LogElement>
-void SystemLogQueue<LogElement>::waitFlush(uint64_t expected_flushed_up_to)
-{
-    // Use an arbitrary timeout to avoid endless waiting. 60s proved to be
-    // too fast for our parallel functional tests, probably because they
-    // heavily load the disk.
-    const int timeout_seconds = 180;
-    std::unique_lock lock(mutex);
-    bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]
-    {
-        return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables;
-    });
-
-    if (!result)
-    {
-        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.",
-            toString(timeout_seconds), demangle(typeid(*this).name()));
-    }
-}
-
 template <typename LogElement>
 void SystemLogQueue<LogElement>::confirm(uint64_t to_flush_end)
 {
@@ -229,16 +239,6 @@ void SystemLogBase<LogElement>::add(const LogElement & element)
     queue->push(element);
 }
 
-template <typename LogElement>
-void SystemLogBase<LogElement>::flush(bool force)
-{
-    uint64_t this_thread_requested_offset = queue->notifyFlush(force);
-    if (this_thread_requested_offset == uint64_t(-1))
-        return;
-
-    queue->waitFlush(this_thread_requested_offset);
-}
-
 template <typename LogElement>
 void SystemLogBase<LogElement>::notifyFlush(bool force) { queue->notifyFlush(force); }
 

From 067e3caa2c43ed981a7c598c45668f37b0ac32c6 Mon Sep 17 00:00:00 2001
From: chen768959 <934103231@qq.com>
Date: Thu, 20 Jul 2023 18:13:19 +0800
Subject: [PATCH 129/242] Remove constants from description_sorted_.

---
 src/Processors/Transforms/FinishSortingTransform.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index 066928446f2..744d035d0ee 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -38,15 +38,11 @@ FinishSortingTransform::FinishSortingTransform(
     /// Remove constants from description_sorted_.
     SortDescription description_sorted_without_constants;
     description_sorted_without_constants.reserve(description_sorted_.size());
-    size_t num_columns = header.columns();
-    ColumnNumbers map(num_columns, num_columns);
     for (const auto & column_description : description_sorted_)
     {
-        auto old_pos = header.getPositionByName(column_description.column_name);
-        auto new_pos = map[old_pos];
+        auto pos = header.getPositionByName(column_description.column_name);
 
-        if (new_pos < num_columns)
-        {
+        if (!const_columns_to_remove[pos]){
             description_sorted_without_constants.push_back(column_description);
         }
     }

From 0ba97eeea597ad027c375cf292419dd555a9cb73 Mon Sep 17 00:00:00 2001
From: lgbo-ustc <lgbo.ustc@gmail.com>
Date: Thu, 15 Jun 2023 08:05:47 +0800
Subject: [PATCH 130/242] wip: grace hash join support full & right join

---
 docs/en/operations/settings/settings.md       |  2 +
 src/Interpreters/GraceHashJoin.cpp            | 31 +++++++--
 src/Interpreters/GraceHashJoin.h              |  3 +-
 .../Transforms/JoiningTransform.cpp           | 65 +++++++++++++++++--
 src/Processors/Transforms/JoiningTransform.h  | 24 ++++++-
 src/QueryPipeline/QueryPipelineBuilder.cpp    |  2 +-
 ...01721_join_implicit_cast_long.reference.j2 | 40 ------------
 .../01721_join_implicit_cast_long.sql.j2      |  1 -
 .../02273_full_sort_join.reference.j2         | 18 +----
 .../0_stateless/02273_full_sort_join.sql.j2   |  4 +-
 ...274_full_sort_join_nodistinct.reference.j2 | 34 +---------
 .../02274_full_sort_join_nodistinct.sql.j2    |  6 +-
 .../02275_full_sort_join_long.reference       | 24 ++++++-
 .../02275_full_sort_join_long.sql.j2          |  9 +--
 14 files changed, 138 insertions(+), 125 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 22aeecf4335..580b51a984d 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -537,6 +537,8 @@ Possible values:
 
  The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
 
+ Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
+
 - hash
 
  [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp
index edf604bc0b4..f94453293f6 100644
--- a/src/Interpreters/GraceHashJoin.cpp
+++ b/src/Interpreters/GraceHashJoin.cpp
@@ -301,8 +301,10 @@ void GraceHashJoin::initBuckets()
 
 bool GraceHashJoin::isSupported(const std::shared_ptr<TableJoin> & table_join)
 {
+
     bool is_asof = (table_join->strictness() == JoinStrictness::Asof);
-    return !is_asof && isInnerOrLeft(table_join->kind()) && table_join->oneDisjunct();
+    auto kind = table_join->kind();
+    return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct();
 }
 
 GraceHashJoin::~GraceHashJoin() = default;
@@ -322,7 +324,6 @@ bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) con
     /// One row can't be split, avoid loop
     if (total_rows < 2)
         return false;
-
     bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes);
 
     if (has_overflow)
@@ -494,17 +495,30 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const
     return hash_join_is_empty;
 }
 
-IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const
+/// Each bucket are handled by the following steps
+/// 1. build hash_join by the right side blocks.
+/// 2. join left side with the hash_join,
+/// 3. read right non-joined blocks from hash_join.
+/// buckets are handled one by one, each hash_join will not be release before the right non-joined blocks are emitted.
+///
+/// There is a finished counter in JoiningTransform/DelayedJoinedBlocksWorkerTransform,
+/// only one processor could take the non-joined blocks from right stream, and ensure all rows from
+/// left stream have been emitted before this.
+IBlocksStreamPtr
+GraceHashJoin::getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size_) const
 {
-    /// We do no support returning non joined blocks here.
-    /// TODO: They _should_ be reported by getDelayedBlocks instead
-    return nullptr;
+    return hash_join->getNonJoinedBlocks(left_sample_block_, result_sample_block_, max_block_size_);
 }
 
 class GraceHashJoin::DelayedBlocks : public IBlocksStream
 {
 public:
-    explicit DelayedBlocks(size_t current_bucket_, Buckets buckets_, InMemoryJoinPtr hash_join_, const Names & left_key_names_, const Names & right_key_names_)
+    explicit DelayedBlocks(
+        size_t current_bucket_,
+        Buckets buckets_,
+        InMemoryJoinPtr hash_join_,
+        const Names & left_key_names_,
+        const Names & right_key_names_)
         : current_bucket(current_bucket_)
         , buckets(std::move(buckets_))
         , hash_join(std::move(hash_join_))
@@ -522,12 +536,15 @@ public:
 
         do
         {
+            // One DelayedBlocks is shared among multiple DelayedJoinedBlocksWorkerTransform.
+            // There is a lock inside left_reader.read() .
             block = left_reader.read();
             if (!block)
             {
                 return {};
             }
 
+            // block comes from left_reader, need to join with right table to get the result.
             Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets);
             block = std::move(blocks[current_idx]);
 
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index bce04ee6b04..ce519892b0e 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -13,7 +13,6 @@
 
 namespace DB
 {
-
 class TableJoin;
 class HashJoin;
 
@@ -79,7 +78,7 @@ public:
     bool supportTotals() const override { return false; }
 
     IBlocksStreamPtr
-    getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
+    getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size) const override;
 
     /// Open iterator over joined blocks.
     /// Must be called after all @joinBlock calls.
diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index 49b90d04b81..f1ceefbf229 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -189,7 +189,6 @@ void JoiningTransform::transform(Chunk & chunk)
     }
     else
         block = readExecute(chunk);
-
     auto num_rows = block.rows();
     chunk.setColumns(block.getColumns(), num_rows);
 }
@@ -311,8 +310,16 @@ void FillingRightJoinSideTransform::work()
 }
 
 
-DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(Block output_header)
-    : IProcessor(InputPorts{Block()}, OutputPorts{output_header})
+DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(
+    Block left_header_,
+    Block output_header_,
+    size_t max_block_size_,
+    JoinPtr join_)
+    : IProcessor(InputPorts{Block()}, OutputPorts{output_header_})
+    , left_header(left_header_)
+    , output_header(output_header_)
+    , max_block_size(max_block_size_)
+    , join(join_)
 {
 }
 
@@ -365,6 +372,7 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
 
         if (!data.chunk.hasChunkInfo())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info");
+
         task = std::dynamic_pointer_cast<const DelayedBlocksTask>(data.chunk.getChunkInfo());
     }
     else
@@ -387,11 +395,24 @@ void DelayedJoinedBlocksWorkerTransform::work()
     if (!task)
         return;
 
-    Block block = task->delayed_blocks->next();
+    Block block;
+    if (!left_delayed_stream_finished)
+    {
+        block = task->delayed_blocks->next();
 
+        if (!block)
+        {
+            left_delayed_stream_finished = true;
+            block = nextNonJoinedBlock();
+        }
+    }
+    else
+    {
+        block = nextNonJoinedBlock();
+    }
     if (!block)
     {
-        task.reset();
+        resetTask();
         return;
     }
 
@@ -400,6 +421,31 @@ void DelayedJoinedBlocksWorkerTransform::work()
     output_chunk.setColumns(block.getColumns(), rows);
 }
 
+void DelayedJoinedBlocksWorkerTransform::resetTask()
+{
+    task.reset();
+    left_delayed_stream_finished = false;
+    non_joined_delayed_stream = nullptr;
+}
+
+Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock()
+{
+    // Before read from non-joined stream, all blocks in left file reader must have been joined.
+    // For example, in HashJoin, it may return invalid mismatch rows from non-joined stream before
+    // the all blocks in left file reader have been finished, since the used flags are incomplete.
+    // To make only one processor could read from non-joined stream seems be a easy way.
+    if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast())
+    {
+        non_joined_delayed_stream = join->getNonJoinedBlocks(left_header, output_header, max_block_size);
+    }
+
+    if (non_joined_delayed_stream)
+    {
+        return non_joined_delayed_stream->next();
+    }
+    return {};
+}
+
 DelayedJoinedBlocksTransform::DelayedJoinedBlocksTransform(size_t num_streams, JoinPtr join_)
     : IProcessor(InputPorts{}, OutputPorts(num_streams, Block()))
     , join(std::move(join_))
@@ -433,6 +479,9 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
 
     if (finished)
     {
+        // Since have memory limit, cannot handle all buckets parallelly by different
+        // DelayedJoinedBlocksWorkerTransform. So send the same task to all outputs.
+        // Wait for all DelayedJoinedBlocksWorkerTransform be idle before getting next bucket.
         for (auto & output : outputs)
         {
             if (output.isFinished())
@@ -448,10 +497,14 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
 
     if (delayed_blocks)
     {
+        // This counter is used to ensure that only the last DelayedJoinedBlocksWorkerTransform
+        // could read right non-joined blocks from the join.
+        auto left_delayed_stream_finished_counter = std::make_shared<JoiningTransform::FinishCounter>(outputs.size());
         for (auto & output : outputs)
         {
             Chunk chunk;
-            chunk.setChunkInfo(std::make_shared<DelayedBlocksTask>(delayed_blocks));
+            auto task = std::make_shared<DelayedBlocksTask>(delayed_blocks, left_delayed_stream_finished_counter);
+            chunk.setChunkInfo(task);
             output.push(std::move(chunk));
         }
         delayed_blocks = nullptr;
diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h
index e7edff40c56..10b413ed4e5 100644
--- a/src/Processors/Transforms/JoiningTransform.h
+++ b/src/Processors/Transforms/JoiningTransform.h
@@ -116,9 +116,14 @@ class DelayedBlocksTask : public ChunkInfo
 public:
 
     explicit DelayedBlocksTask() : finished(true) {}
-    explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_) : delayed_blocks(std::move(delayed_blocks_)) {}
+    explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_)
+        : delayed_blocks(std::move(delayed_blocks_))
+        , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_)
+    {
+    }
 
     IBlocksStreamPtr delayed_blocks = nullptr;
+    JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr;
 
     bool finished = false;
 };
@@ -147,7 +152,11 @@ private:
 class DelayedJoinedBlocksWorkerTransform : public IProcessor
 {
 public:
-    explicit DelayedJoinedBlocksWorkerTransform(Block output_header);
+    explicit DelayedJoinedBlocksWorkerTransform(
+        Block left_header_,
+        Block output_header_,
+        size_t max_block_size_,
+        JoinPtr join_);
 
     String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; }
 
@@ -155,10 +164,19 @@ public:
     void work() override;
 
 private:
+    Block left_header;
+    Block output_header;
+    size_t max_block_size;
+    JoinPtr join;
     DelayedBlocksTaskPtr task;
     Chunk output_chunk;
 
-    bool finished = false;
+    /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left
+    bool left_delayed_stream_finished = false;
+    IBlocksStreamPtr non_joined_delayed_stream = nullptr;
+
+    void resetTask();
+    Block nextNonJoinedBlock();
 };
 
 }
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index dedf85e409c..ba98d725532 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -491,7 +491,7 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
         if (delayed_root)
         {
             // Process delayed joined blocks when all JoiningTransform are finished.
-            auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(joined_header);
+            auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(left_header, joined_header, max_block_size, join);
             if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output");
 
diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2
index e9f32087439..ae43aa7195c 100644
--- a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2
+++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2
@@ -1,7 +1,6 @@
 {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%}
 === {{ join_algorithm }} ===
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 -4	0	196
 -3	0	197
 -2	0	198
@@ -17,7 +16,6 @@
 8	108	\N
 9	109	\N
 10	110	\N
-{% endif -%}
 = left =
 1	101	201
 2	102	202
@@ -30,7 +28,6 @@
 9	109	\N
 10	110	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 -4	0	196
 -3	0	197
 -2	0	198
@@ -41,7 +38,6 @@
 3	103	203
 4	104	204
 5	105	205
-{% endif -%}
 = inner =
 1	101	201
 2	102	202
@@ -49,7 +45,6 @@
 4	104	204
 5	105	205
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4
 0	0	-3
 0	0	-2
@@ -65,7 +60,6 @@
 8	8	0
 9	9	0
 10	10	0
-{% endif -%}
 = left =
 1	1	1
 2	2	2
@@ -78,7 +72,6 @@
 9	9	0
 10	10	0
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4
 0	0	-3
 0	0	-2
@@ -89,7 +82,6 @@
 3	3	3
 4	4	4
 5	5	5
-{% endif -%}
 = inner =
 1	1	1
 2	2	2
@@ -98,7 +90,6 @@
 5	5	5
 = join on =
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4	196
 0	0	-3	197
 0	0	-2	198
@@ -114,7 +105,6 @@
 8	108	0	\N
 9	109	0	\N
 10	110	0	\N
-{% endif -%}
 = left =
 1	101	1	201
 2	102	2	202
@@ -127,7 +117,6 @@
 9	109	0	\N
 10	110	0	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4	196
 0	0	-3	197
 0	0	-2	198
@@ -138,7 +127,6 @@
 3	103	3	203
 4	104	4	204
 5	105	5	205
-{% endif -%}
 = inner =
 1	101	1	201
 2	102	2	202
@@ -146,7 +134,6 @@
 4	104	4	204
 5	105	5	205
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4	196
 0	0	-3	197
 0	0	-2	198
@@ -162,7 +149,6 @@
 8	108	0	\N
 9	109	0	\N
 10	110	0	\N
-{% endif -%}
 = left =
 1	101	1	201
 2	102	2	202
@@ -175,7 +161,6 @@
 9	109	0	\N
 10	110	0	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 0	0	-4	196
 0	0	-3	197
 0	0	-2	198
@@ -186,7 +171,6 @@
 3	103	3	203
 4	104	4	204
 5	105	5	205
-{% endif -%}
 = inner =
 1	101	1	201
 2	102	2	202
@@ -196,7 +180,6 @@
 = agg =
 1
 1
-{% if join_algorithm not in ['grace_hash'] -%}
 1
 1
 1
@@ -205,13 +188,11 @@
 1	55	1055
 0	0	-10	0	990
 1	55	15	1055	1015
-{% endif -%}
 = types =
 1
 1
 1
 1
-{% if join_algorithm not in ['grace_hash'] -%}
 1
 1
 1
@@ -219,11 +200,9 @@
 1
 1
 1
-{% endif -%}
 {% if join_algorithm not in ['full_sorting_merge'] -%}
 === join use nulls ===
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 -4	\N	196
 -3	\N	197
 -2	\N	198
@@ -239,7 +218,6 @@
 8	108	\N
 9	109	\N
 10	110	\N
-{% endif -%}
 = left =
 1	101	201
 2	102	202
@@ -252,7 +230,6 @@
 9	109	\N
 10	110	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 -4	\N	196
 -3	\N	197
 -2	\N	198
@@ -263,7 +240,6 @@
 3	103	203
 4	104	204
 5	105	205
-{% endif -%}
 = inner =
 1	101	201
 2	102	202
@@ -271,7 +247,6 @@
 4	104	204
 5	105	205
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	1	1
 2	2	2
 3	3	3
@@ -287,7 +262,6 @@
 \N	\N	-2
 \N	\N	-1
 \N	\N	0
-{% endif -%}
 = left =
 1	1	1
 2	2	2
@@ -300,7 +274,6 @@
 9	9	\N
 10	10	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	1	1
 2	2	2
 3	3	3
@@ -311,7 +284,6 @@
 \N	\N	-2
 \N	\N	-1
 \N	\N	0
-{% endif -%}
 = inner =
 1	1	1
 2	2	2
@@ -320,7 +292,6 @@
 5	5	5
 = join on =
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	101	1	201
 2	102	2	202
 3	103	3	203
@@ -336,7 +307,6 @@
 \N	\N	-2	198
 \N	\N	-1	199
 \N	\N	0	200
-{% endif -%}
 = left =
 1	101	1	201
 2	102	2	202
@@ -349,7 +319,6 @@
 9	109	\N	\N
 10	110	\N	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	101	1	201
 2	102	2	202
 3	103	3	203
@@ -360,7 +329,6 @@
 \N	\N	-2	198
 \N	\N	-1	199
 \N	\N	0	200
-{% endif -%}
 = inner =
 1	101	1	201
 2	102	2	202
@@ -368,7 +336,6 @@
 4	104	4	204
 5	105	5	205
 = full =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	101	1	201
 2	102	2	202
 3	103	3	203
@@ -384,7 +351,6 @@
 \N	\N	-2	198
 \N	\N	-1	199
 \N	\N	0	200
-{% endif -%}
 = left =
 1	101	1	201
 2	102	2	202
@@ -397,7 +363,6 @@
 9	109	\N	\N
 10	110	\N	\N
 = right =
-{% if join_algorithm not in ['grace_hash'] -%}
 1	101	1	201
 2	102	2	202
 3	103	3	203
@@ -408,7 +373,6 @@
 \N	\N	-2	198
 \N	\N	-1	199
 \N	\N	0	200
-{% endif -%}
 = inner =
 1	101	1	201
 2	102	2	202
@@ -418,7 +382,6 @@
 = agg =
 1
 1
-{% if join_algorithm not in ['grace_hash'] -%}
 1
 1
 1
@@ -427,13 +390,11 @@
 1	55	1055
 1	55	15	1055	1015
 \N	\N	-10	\N	990
-{% endif -%}
 = types =
 1
 1
 1
 1
-{% if join_algorithm not in ['grace_hash'] -%}
 1
 1
 1
@@ -442,5 +403,4 @@
 1
 1
 {% endif -%}
-{% endif -%}
 {% endfor -%}
diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2
index f5321939f28..38f71f4c5ec 100644
--- a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2
+++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2
@@ -10,7 +10,6 @@ INSERT INTO t1 SELECT number as a, 100 + number as b FROM system.numbers LIMIT 1
 INSERT INTO t2 SELECT number - 5 as a, 200 + number - 5 as b FROM system.numbers LIMIT 1, 10;
 
 {% macro is_implemented(join_algorithm) -%}
-{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %}
 {% endmacro -%}
 
 {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%}
diff --git a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 b/tests/queries/0_stateless/02273_full_sort_join.reference.j2
index 98bfd9d9b2b..0af4158e971 100644
--- a/tests/queries/0_stateless/02273_full_sort_join.reference.j2
+++ b/tests/queries/0_stateless/02273_full_sort_join.reference.j2
@@ -1,7 +1,7 @@
 {% set table_size = 15 -%}
 {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%}
 -- {{ join_algorithm }} --
-{% for block_size in range(1, table_size + 1) -%}
+{% for block_size in range(1, table_size + 1, 4) -%}
 ALL INNER USING | bs = {{ block_size }}
 4	0	0
 5	0	0
@@ -50,7 +50,6 @@ ALL LEFT | bs = {{ block_size }}
 14	14	val9	0
 14	14	val9	0
 ALL RIGHT | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 4	4	0	val10
 5	5	0	val6
 6	6	0	val8
@@ -64,7 +63,6 @@ ALL RIGHT | bs = {{ block_size }}
 13	13	0	val9
 14	14	0	val3
 14	14	0	val7
-{% endif -%}
 ALL INNER | bs = {{ block_size }} | copmosite key
 2	2	2	2	2	2	0	0
 2	2	2	2	2	2	0	0
@@ -85,7 +83,6 @@ ALL LEFT | bs = {{ block_size }} | copmosite key
 2	2	2	2	2	2	val12	0
 2	2	2	2	2	2	val9	0
 ALL RIGHT | bs = {{ block_size }} | copmosite key
-{% if join_algorithm != 'grace_hash' -%}
 0	\N	0	1	1	1	1	val2
 0	\N	0	1	1	1	1	val7
 0	\N	0	1	1	2	1	val5
@@ -99,7 +96,6 @@ ALL RIGHT | bs = {{ block_size }} | copmosite key
 0	\N	0	2	2	\N	1	val9
 2	2	2	2	2	2	0	val4
 2	2	2	2	2	2	0	val4
-{% endif -%}
 ANY INNER USING | bs = {{ block_size }}
 4	0	0
 5	0	0
@@ -137,7 +133,6 @@ ANY LEFT | bs = {{ block_size }}
 13	13	val13	0
 14	14	val9	0
 ANY RIGHT | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 4	4	0	val10
 5	5	0	val6
 6	6	0	val8
@@ -150,7 +145,6 @@ ANY RIGHT | bs = {{ block_size }}
 13	13	0	val9
 14	14	0	val3
 14	14	0	val7
-{% endif -%}
 ANY INNER | bs = {{ block_size }} | copmosite key
 2	2	2	2	2	2	0	0
 ANY LEFT | bs = {{ block_size }} | copmosite key
@@ -170,7 +164,6 @@ ANY LEFT | bs = {{ block_size }} | copmosite key
 2	2	2	2	2	2	val12	0
 2	2	2	2	2	2	val9	0
 ANY RIGHT | bs = {{ block_size }} | copmosite key
-{% if join_algorithm != 'grace_hash' -%}
 0	\N	0	1	1	1	1	val2
 0	\N	0	1	1	1	1	val7
 0	\N	0	1	1	2	1	val5
@@ -183,7 +176,6 @@ ANY RIGHT | bs = {{ block_size }} | copmosite key
 0	\N	0	2	1	\N	1	val3
 0	\N	0	2	2	\N	1	val9
 2	2	2	2	2	2	0	val4
-{% endif -%}
 {% endfor -%}
 ALL INNER | join_use_nulls = 1
 4	4	0	0
@@ -219,7 +211,6 @@ ALL LEFT | join_use_nulls = 1
 14	14	val9	0
 14	14	val9	0
 ALL RIGHT | join_use_nulls = 1
-{% if join_algorithm != 'grace_hash' -%}
 4	4	0	val10
 5	5	0	val6
 6	6	0	val8
@@ -233,7 +224,6 @@ ALL RIGHT | join_use_nulls = 1
 13	13	0	val9
 14	14	0	val3
 14	14	0	val7
-{% endif -%}
 ALL INNER | join_use_nulls = 1 | copmosite key
 2	2	2	2	2	2	0	0
 2	2	2	2	2	2	0	0
@@ -254,7 +244,6 @@ ALL LEFT | join_use_nulls = 1 | copmosite key
 2	2	2	2	2	2	val12	0
 2	2	2	2	2	2	val9	0
 ALL RIGHT | join_use_nulls = 1 | copmosite key
-{% if join_algorithm != 'grace_hash' -%}
 2	2	2	2	2	2	0	val4
 2	2	2	2	2	2	0	val4
 \N	\N	\N	1	1	1	\N	val2
@@ -268,7 +257,6 @@ ALL RIGHT | join_use_nulls = 1 | copmosite key
 \N	\N	\N	2	1	2	\N	val8
 \N	\N	\N	2	1	\N	\N	val3
 \N	\N	\N	2	2	\N	\N	val9
-{% endif -%}
 ANY INNER | join_use_nulls = 1
 4	4	0	0
 5	5	0	0
@@ -296,7 +284,6 @@ ANY LEFT | join_use_nulls = 1
 13	13	val13	0
 14	14	val9	0
 ANY RIGHT | join_use_nulls = 1
-{% if join_algorithm != 'grace_hash' -%}
 4	4	0	val10
 5	5	0	val6
 6	6	0	val8
@@ -309,7 +296,6 @@ ANY RIGHT | join_use_nulls = 1
 13	13	0	val9
 14	14	0	val3
 14	14	0	val7
-{% endif -%}
 ANY INNER | join_use_nulls = 1 | copmosite key
 2	2	2	2	2	2	0	0
 ANY LEFT | join_use_nulls = 1 | copmosite key
@@ -329,7 +315,6 @@ ANY LEFT | join_use_nulls = 1 | copmosite key
 2	2	2	2	2	2	val12	0
 2	2	2	2	2	2	val9	0
 ANY RIGHT | join_use_nulls = 1 | copmosite key
-{% if join_algorithm != 'grace_hash' -%}
 2	2	2	2	2	2	0	val4
 \N	\N	\N	1	1	1	\N	val2
 \N	\N	\N	1	1	1	\N	val7
@@ -342,5 +327,4 @@ ANY RIGHT | join_use_nulls = 1 | copmosite key
 \N	\N	\N	2	1	2	\N	val8
 \N	\N	\N	2	1	\N	\N	val3
 \N	\N	\N	2	2	\N	\N	val9
-{% endif -%}
 {% endfor -%}
diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2
index 43f7354017c..6b6aa53836e 100644
--- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2
+++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2
@@ -28,9 +28,7 @@ INSERT INTO t2
         'val' || toString(number) as s
     FROM numbers_mt({{ table_size - 3 }});
 
-
 {% macro is_implemented(join_algorithm) -%}
-{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %}
 {% endmacro -%}
 
 {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%}
@@ -40,7 +38,7 @@ SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{%
 SELECT '-- {{ join_algorithm }} --';
 SET join_algorithm = '{{ join_algorithm }}';
 
-{% for block_size in range(1, table_size + 1) -%}
+{% for block_size in range(1, table_size + 1, 4) -%}
 {% for kind in ['ALL', 'ANY'] -%}
 
 SET max_block_size = {{ block_size }};
diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2
index 2cc6c6e85d6..df968e86e8d 100644
--- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2
+++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2
@@ -1,6 +1,6 @@
 {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%}
 --- {{ join_algorithm }} ---
-{% for block_size in range(1, 11) -%}
+{% for block_size in range(1, 11, 4) -%}
 t1 ALL INNER JOIN t2 | bs = {{ block_size }}
 1	1	4	5
 1	1	4	5
@@ -108,7 +108,6 @@ t1 ALL LEFT JOIN t2 | bs = {{ block_size }}
 2	2	val27	5
 3	3	val3	4
 t1 ALL RIGHT JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 2	2	5	val22
@@ -161,7 +160,6 @@ t1 ALL RIGHT JOIN t2 | bs = {{ block_size }}
 2	2	5	val28
 2	2	5	val28
 3	3	4	val3
-{% endif -%}
 t1 ANY INNER JOIN t2 | bs = {{ block_size }}
 1	1	4	5
 2	2	5	5
@@ -177,7 +175,6 @@ t1 ANY LEFT JOIN t2 | bs = {{ block_size }}
 2	2	val27	5
 3	3	val3	4
 t1 ANY RIGHT JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 2	2	5	val22
@@ -188,9 +185,7 @@ t1 ANY RIGHT JOIN t2 | bs = {{ block_size }}
 2	2	5	val27
 2	2	5	val28
 3	3	4	val3
-{% endif -%}
 t1 ALL FULL JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	5
 1	1	4	5
 2	2	5	5
@@ -243,9 +238,7 @@ t1 ALL FULL JOIN t2 | bs = {{ block_size }}
 2	2	5	5
 2	2	5	5
 3	3	4	4
-{% endif -%}
 t1 ALL FULL JOIN USING t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	4	5
 1	4	5
 2	5	5
@@ -298,7 +291,6 @@ t1 ALL FULL JOIN USING t2 | bs = {{ block_size }}
 2	5	5
 2	5	5
 3	4	4
-{% endif -%}
 t1 ALL INNER JOIN tn2 | bs = {{ block_size }}
 1	1	4	5
 1	1	4	5
@@ -315,7 +307,6 @@ t1 ALL LEFT JOIN tn2 | bs = {{ block_size }}
 2	\N	val27	0
 3	3	val3	4
 t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 0	\N	0	val22
 0	\N	0	val23
 0	\N	0	val24
@@ -326,7 +317,6 @@ t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
-{% endif -%}
 t1 ANY INNER JOIN tn2 | bs = {{ block_size }}
 1	1	4	5
 3	3	4	4
@@ -341,7 +331,6 @@ t1 ANY LEFT JOIN tn2 | bs = {{ block_size }}
 2	\N	val27	0
 3	3	val3	4
 t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 0	\N	0	val22
 0	\N	0	val23
 0	\N	0	val24
@@ -352,9 +341,7 @@ t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
-{% endif -%}
 t1 ALL FULL JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 0	\N	0	5
 0	\N	0	5
 0	\N	0	5
@@ -372,9 +359,8 @@ t1 ALL FULL JOIN tn2 | bs = {{ block_size }}
 2	\N	5	0
 2	\N	5	0
 3	3	4	4
-{% endif -%}
-t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }}
 {% if join_algorithm != 'grace_hash' -%}
+t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }}
 1	4	5
 1	4	5
 2	5	0
@@ -409,7 +395,6 @@ tn1 ALL LEFT JOIN t2 | bs = {{ block_size }}
 \N	0	val26	0
 \N	0	val27	0
 tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
@@ -420,7 +405,6 @@ tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }}
 \N	2	0	val26
 \N	2	0	val27
 \N	2	0	val28
-{% endif -%}
 tn1 ANY INNER JOIN t2 | bs = {{ block_size }}
 1	1	4	5
 3	3	4	4
@@ -435,7 +419,6 @@ tn1 ANY LEFT JOIN t2 | bs = {{ block_size }}
 \N	0	val26	0
 \N	0	val27	0
 tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
@@ -446,9 +429,7 @@ tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }}
 \N	2	0	val26
 \N	2	0	val27
 \N	2	0	val28
-{% endif -%}
 tn1 ALL FULL JOIN t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	5
 1	1	4	5
 3	3	4	4
@@ -466,9 +447,7 @@ tn1 ALL FULL JOIN t2 | bs = {{ block_size }}
 \N	2	0	5
 \N	2	0	5
 \N	2	0	5
-{% endif -%}
 tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	4	5
 1	4	5
 2	0	5
@@ -486,7 +465,6 @@ tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }}
 \N	5	0
 \N	5	0
 \N	5	0
-{% endif -%}
 tn1 ALL INNER JOIN tn2 | bs = {{ block_size }}
 1	1	4	5
 1	1	4	5
@@ -503,7 +481,6 @@ tn1 ALL LEFT JOIN tn2 | bs = {{ block_size }}
 \N	\N	val26	0
 \N	\N	val27	0
 tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
@@ -514,7 +491,6 @@ tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }}
 \N	\N	0	val26
 \N	\N	0	val27
 \N	\N	0	val28
-{% endif -%}
 tn1 ANY INNER JOIN tn2 | bs = {{ block_size }}
 1	1	4	5
 3	3	4	4
@@ -529,7 +505,6 @@ tn1 ANY LEFT JOIN tn2 | bs = {{ block_size }}
 \N	\N	val26	0
 \N	\N	val27	0
 tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	val11
 1	1	4	val12
 3	3	4	val3
@@ -540,9 +515,7 @@ tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }}
 \N	\N	0	val26
 \N	\N	0	val27
 \N	\N	0	val28
-{% endif -%}
 tn1 ALL FULL JOIN tn2 | bs = {{ block_size }}
-{% if join_algorithm != 'grace_hash' -%}
 1	1	4	5
 1	1	4	5
 3	3	4	4
@@ -560,9 +533,8 @@ tn1 ALL FULL JOIN tn2 | bs = {{ block_size }}
 \N	\N	5	0
 \N	\N	5	0
 \N	\N	5	0
-{% endif -%}
-tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }}
 {% if join_algorithm != 'grace_hash' -%}
+tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }}
 1	4	5
 1	4	5
 3	4	4
diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2
index 613da65421e..f8eb4b1a53e 100644
--- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2
+++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2
@@ -16,7 +16,6 @@ INSERT INTO t2 VALUES (1, 'val11'), (1, 'val12'), (2, 'val22'), (2, 'val23'), (2
 INSERT INTO tn2 VALUES (1, 'val11'), (1, 'val12'), (NULL, 'val22'), (NULL, 'val23'), (NULL, 'val24'), (NULL, 'val25'), (NULL, 'val26'), (NULL, 'val27'), (NULL, 'val28'), (3, 'val3');
 
 {% macro is_implemented(join_algorithm) -%}
-{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %}
 {% endmacro -%}
 
 {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%}
@@ -27,7 +26,7 @@ SET join_algorithm = '{{ join_algorithm }}';
 
 SELECT '--- {{ join_algorithm }} ---';
 
-{% for block_size in range(1, 11) -%}
+{% for block_size in range(1, 11, 4) -%}
 SET max_block_size = {{ block_size }};
 
 {% for t1, t2 in [('t1', 't2'), ('t1', 'tn2'), ('tn1', 't2'), ('tn1', 'tn2')]  -%}
@@ -47,9 +46,10 @@ SELECT t1.key, t2.key, length(t1.s), t2.s FROM {{ t1 }} AS t1 {{ kind }} RIGHT J
 SELECT '{{ t1 }} ALL FULL JOIN {{ t2 }} | bs = {{ block_size }}';
 SELECT t1.key, t2.key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 {{ kind }} FULL JOIN {{ t2 }} AS t2 ON t1.key == t2.key ORDER BY t1.key, t2.key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }}
 
+{% if join_algorithm == 'full_sorting_merge' or t2 != 'tn2' -%}
 SELECT '{{ t1 }} ALL FULL JOIN USING {{ t2 }} | bs = {{ block_size }}';
 SELECT key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 ALL FULL JOIN {{ t2 }} AS t2 USING (key) ORDER BY key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }}
-
+{% endif -%}
 {% endfor -%}
 {% endfor -%}
 SET max_bytes_in_join = 0;
diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.reference b/tests/queries/0_stateless/02275_full_sort_join_long.reference
index 9ec06aea3e6..73482358d12 100644
--- a/tests/queries/0_stateless/02275_full_sort_join_long.reference
+++ b/tests/queries/0_stateless/02275_full_sort_join_long.reference
@@ -41,16 +41,34 @@ ALL INNER
 ALL LEFT
 50195752660639	500353531835	10369589	10369589	1000342
 ALL RIGHT
-skipped
+500353531835	684008812186	1367170	1000342	1367170
 ALL INNER
 500353531835	500353531835	1000342	1000342	1000342
 ALL LEFT
 50195752660639	500353531835	10369589	10369589	1000342
 ALL RIGHT
-skipped
+500353531835	684008812186	1367170	1000342	1367170
 ALL INNER
 500353531835	500353531835	1000342	1000342	1000342
 ALL LEFT
 50195752660639	500353531835	10369589	10369589	1000342
 ALL RIGHT
-skipped
+500353531835	684008812186	1367170	1000342	1367170
+ANY INNER
+199622811843	199622811843	399458	399458	399458
+ANY LEFT
+50010619420459	315220291655	10000000	10000000	630753
+ANY RIGHT
+316611844056	500267124407	1000000	633172	1000000
+ANY INNER
+199622811843	199622811843	399458	399458	399458
+ANY LEFT
+50010619420459	315220291655	10000000	10000000	630753
+ANY RIGHT
+316611844056	500267124407	1000000	633172	1000000
+ANY INNER
+199622811843	199622811843	399458	399458	399458
+ANY LEFT
+50010619420459	315220291655	10000000	10000000	630753
+ANY RIGHT
+316611844056	500267124407	1000000	633172	1000000
diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2
index 7276e77dc16..621352f9c25 100644
--- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2
+++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2
@@ -22,11 +22,6 @@ INSERT INTO t2
     FROM numbers_mt({{ rtable_size }})
 ;
 
-{% macro is_implemented(join_algorithm) -%}
-{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED }
-SELECT 'skipped';
-{% endif -%}
-{% endmacro -%}
 
 {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%}
 
@@ -40,7 +35,6 @@ SET join_algorithm = '{{ join_algorithm }}';
 
 SET max_block_size = {{ block_size }};
 
-{% if not (kind == 'ANY' and join_algorithm == 'grace_hash') -%}
 
 SELECT '{{ kind }} INNER';
 SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1
@@ -58,9 +52,8 @@ SELECT '{{ kind }} RIGHT';
 SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1
 {{ kind }} RIGHT JOIN t2
 ON t1.key == t2.key
-; {{ is_implemented(join_algorithm) }}
+;
 
-{% endif -%}
 
 {% endfor -%}
 {% endfor -%}

From 91dc6a35e17417a44de46d76c0f0214911615244 Mon Sep 17 00:00:00 2001
From: lgbo-ustc <lgbo.ustc@gmail.com>
Date: Wed, 19 Jul 2023 09:18:16 +0800
Subject: [PATCH 131/242] update

---
 src/Interpreters/GraceHashJoin.cpp            |  1 -
 .../Transforms/JoiningTransform.cpp           | 19 +++++--------------
 src/Processors/Transforms/JoiningTransform.h  | 14 ++++----------
 src/QueryPipeline/QueryPipelineBuilder.cpp    |  5 ++++-
 4 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp
index f94453293f6..5d72cf20740 100644
--- a/src/Interpreters/GraceHashJoin.cpp
+++ b/src/Interpreters/GraceHashJoin.cpp
@@ -301,7 +301,6 @@ void GraceHashJoin::initBuckets()
 
 bool GraceHashJoin::isSupported(const std::shared_ptr<TableJoin> & table_join)
 {
-
     bool is_asof = (table_join->strictness() == JoinStrictness::Asof);
     auto kind = table_join->kind();
     return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct();
diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index f1ceefbf229..5480fea27a4 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -311,15 +311,10 @@ void FillingRightJoinSideTransform::work()
 
 
 DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(
-    Block left_header_,
     Block output_header_,
-    size_t max_block_size_,
-    JoinPtr join_)
+    NonJoinedStreamBuilder non_joined_stream_builder_)
     : IProcessor(InputPorts{Block()}, OutputPorts{output_header_})
-    , left_header(left_header_)
-    , output_header(output_header_)
-    , max_block_size(max_block_size_)
-    , join(join_)
+    , non_joined_stream_builder(std::move(non_joined_stream_builder_))
 {
 }
 
@@ -396,15 +391,12 @@ void DelayedJoinedBlocksWorkerTransform::work()
         return;
 
     Block block;
-    if (!left_delayed_stream_finished)
+    /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left
+    if (!task->delayed_blocks->isFinished())
     {
         block = task->delayed_blocks->next();
-
         if (!block)
-        {
-            left_delayed_stream_finished = true;
             block = nextNonJoinedBlock();
-        }
     }
     else
     {
@@ -424,7 +416,6 @@ void DelayedJoinedBlocksWorkerTransform::work()
 void DelayedJoinedBlocksWorkerTransform::resetTask()
 {
     task.reset();
-    left_delayed_stream_finished = false;
     non_joined_delayed_stream = nullptr;
 }
 
@@ -436,7 +427,7 @@ Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock()
     // To make only one processor could read from non-joined stream seems be a easy way.
     if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast())
     {
-        non_joined_delayed_stream = join->getNonJoinedBlocks(left_header, output_header, max_block_size);
+        non_joined_delayed_stream = non_joined_stream_builder();
     }
 
     if (non_joined_delayed_stream)
diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h
index 10b413ed4e5..5e7403dbbdb 100644
--- a/src/Processors/Transforms/JoiningTransform.h
+++ b/src/Processors/Transforms/JoiningTransform.h
@@ -152,11 +152,10 @@ private:
 class DelayedJoinedBlocksWorkerTransform : public IProcessor
 {
 public:
+    using NonJoinedStreamBuilder = std::function<IBlocksStreamPtr()>;
     explicit DelayedJoinedBlocksWorkerTransform(
-        Block left_header_,
         Block output_header_,
-        size_t max_block_size_,
-        JoinPtr join_);
+        NonJoinedStreamBuilder non_joined_stream_builder_);
 
     String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; }
 
@@ -164,15 +163,10 @@ public:
     void work() override;
 
 private:
-    Block left_header;
-    Block output_header;
-    size_t max_block_size;
-    JoinPtr join;
     DelayedBlocksTaskPtr task;
     Chunk output_chunk;
-
-    /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left
-    bool left_delayed_stream_finished = false;
+    /// For building a block stream to access the non-joined rows.
+    NonJoinedStreamBuilder non_joined_stream_builder;
     IBlocksStreamPtr non_joined_delayed_stream = nullptr;
 
     void resetTask();
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index ba98d725532..553b18dd57b 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -491,7 +491,10 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLe
         if (delayed_root)
         {
             // Process delayed joined blocks when all JoiningTransform are finished.
-            auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(left_header, joined_header, max_block_size, join);
+            auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(
+                joined_header,
+                [left_header, joined_header, max_block_size, join]()
+                { return join->getNonJoinedBlocks(left_header, joined_header, max_block_size); });
             if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output");
 

From 84f6a7336c2d7ac547ad7030c389d4961f4ab8e4 Mon Sep 17 00:00:00 2001
From: chen768959 <934103231@qq.com>
Date: Thu, 20 Jul 2023 19:03:42 +0800
Subject: [PATCH 132/242] Prevent going beyond the index of
 const_columns_to_remove.

---
 src/Processors/Transforms/FinishSortingTransform.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index 744d035d0ee..baf898481ab 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -38,11 +38,12 @@ FinishSortingTransform::FinishSortingTransform(
     /// Remove constants from description_sorted_.
     SortDescription description_sorted_without_constants;
     description_sorted_without_constants.reserve(description_sorted_.size());
+    size_t num_columns = const_columns_to_remove.size();
     for (const auto & column_description : description_sorted_)
     {
         auto pos = header.getPositionByName(column_description.column_name);
 
-        if (!const_columns_to_remove[pos]){
+        if (pos < num_columns && !const_columns_to_remove[pos]){
             description_sorted_without_constants.push_back(column_description);
         }
     }

From f2d184cf1b002d18be152880ee2d82e57fed3b26 Mon Sep 17 00:00:00 2001
From: chen768959 <934103231@qq.com>
Date: Thu, 20 Jul 2023 19:11:08 +0800
Subject: [PATCH 133/242] Consistent style for if statements.

---
 src/Processors/Transforms/FinishSortingTransform.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index baf898481ab..63a9c3924a2 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -43,9 +43,8 @@ FinishSortingTransform::FinishSortingTransform(
     {
         auto pos = header.getPositionByName(column_description.column_name);
 
-        if (pos < num_columns && !const_columns_to_remove[pos]){
+        if (pos < num_columns && !const_columns_to_remove[pos])
             description_sorted_without_constants.push_back(column_description);
-        }
     }
     /// The target description is modified in SortingTransform constructor.
     /// To avoid doing the same actions with description_sorted just copy it from prefix of target description.

From d16d4449432999cdee3393b1f47b4a7d7c5314a6 Mon Sep 17 00:00:00 2001
From: Val Doroshchuk <valbok@gmail.com>
Date: Thu, 20 Jul 2023 12:24:52 +0200
Subject: [PATCH 134/242] MaterializedMySQL: Add support of double quoted
 comments

---
 src/Parsers/ExpressionElementParsers.cpp      | 33 +++++++++++++++++
 src/Parsers/ExpressionElementParsers.h        | 15 ++++++++
 src/Parsers/MySQL/ASTDeclareColumn.cpp        |  2 +-
 .../materialized_with_ddl.py                  | 35 +++++++++++++++++++
 .../test_materialized_mysql_database/test.py  |  6 ++++
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 3a7e8790bb4..0149526da79 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1900,6 +1900,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 }
 
 
+bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral)
+        return false;
+    String s;
+    ReadBufferFromMemory in(pos->begin, pos->size());
+    try
+    {
+        if (pos->type == TokenType::StringLiteral)
+            readQuotedStringWithSQLStyle(s, in);
+        else
+            readDoubleQuotedStringWithSQLStyle(s, in);
+    }
+    catch (const Exception &)
+    {
+        expected.add(pos, "string literal or double quoted string");
+        return false;
+    }
+
+    if (in.count() != pos->size())
+    {
+        expected.add(pos, "string literal or double quoted string");
+        return false;
+    }
+
+    auto literal = std::make_shared<ASTLiteral>(s);
+    literal->begin = pos;
+    literal->end = ++pos;
+    node = literal;
+    return true;
+}
+
+
 bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     if (pos->type != TokenType::DoubleAt)
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index cc88faf2653..f33f2d99f71 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -367,6 +367,21 @@ protected:
 };
 
 
+/** MySQL comment:
+  *  CREATE TABLE t (
+  *  i INT PRIMARY KEY,
+  *  first_name VARCHAR(255) COMMENT 'FIRST_NAME',
+  *  last_name VARCHAR(255) COMMENT "LAST_NAME"
+  *  )
+  */
+class ParserMySQLComment : public IParserBase
+{
+protected:
+    const char * getName() const override { return "MySQL comment parser"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+
 /** MySQL-style global variable: @@var
   */
 class ParserMySQLGlobalVariable : public IParserBase
diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp
index e585dcb670c..e5f2b7870e2 100644
--- a/src/Parsers/MySQL/ASTDeclareColumn.cpp
+++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp
@@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node,
             OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
             OptionDescribe("UNIQUE", "unique_key", std::make_unique<ParserAlwaysTrue>()),
             OptionDescribe("KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
-            OptionDescribe("COMMENT", "comment", std::make_unique<ParserStringLiteral>()),
+            OptionDescribe("COMMENT", "comment", std::make_unique<ParserMySQLComment>()),
             OptionDescribe("CHARACTER SET", "charset_name", std::make_unique<ParserCharsetOrCollateName>()),
              OptionDescribe("CHARSET", "charset", std::make_unique<ParserCharsetOrCollateName>()),
             OptionDescribe("COLLATE", "collate", std::make_unique<ParserCharsetOrCollateName>()),
diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index 8cf9e67bf63..f7a930ec00b 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -1617,6 +1617,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_
     mysql_node.query("DROP DATABASE materialized_with_column_comments_test")
 
 
+def double_quoted_comment(clickhouse_node, mysql_node, service_name):
+    db = "comment_db"
+    mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"CREATE DATABASE {db}")
+    mysql_node.query(
+        f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
+    )
+    mysql_node.query(
+        f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
+    )
+    clickhouse_node.query(
+        f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
+    )
+    check_query(
+        clickhouse_node,
+        f"SHOW TABLES FROM {db} FORMAT TSV",
+        "t1\nt2\n",
+    )
+
+    # incremental
+    mysql_node.query(
+        f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
+    )
+    mysql_node.query(
+        f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
+    )
+    check_query(
+        clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n"
+    )
+
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
+
+
 def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")
     clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")
diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py
index 21316d1a474..0166f7d1d33 100644
--- a/tests/integration/test_materialized_mysql_database/test.py
+++ b/tests/integration/test_materialized_mysql_database/test.py
@@ -416,6 +416,12 @@ def test_materialized_with_column_comments(
     )
 
 
+def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialized_with_ddl.double_quoted_comment(
+        clickhouse_node, started_mysql_8_0, "mysql80"
+    )
+
+
 def test_materialized_with_enum(
     started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node
 ):

From fe934d3059936cd203952cfe5881ff7243001ae9 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 20 Jul 2023 12:38:41 +0000
Subject: [PATCH 135/242] Make better

---
 docs/en/engines/table-engines/special/url.md | 2 +-
 docs/en/operations/settings/settings.md      | 6 +++---
 docs/en/sql-reference/table-functions/url.md | 4 ++--
 src/Core/Settings.h                          | 2 +-
 src/Storages/StorageURL.cpp                  | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 9f2bf177c96..f556df0a088 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
-- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index db5d1a2f5d9..d138b07d3ae 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3466,11 +3466,11 @@ Possible values:
 
 Default value: `0`.
 
-## decode_and_encode_path_in_url {#decode_and_encode_path_in_url}
+## disable_url_encoding {#disable_url_encoding}
 
-Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
+Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
 
-Enabled by default.
+Disabled by default.
 
 ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
 
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 96f36f03949..677ed011960 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -56,8 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
-- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
 
-- **See Also**
+**See Also**
 
 - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ffa72d841be..5dc40494115 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -621,7 +621,7 @@ class IColumn;
     M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
     M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
     M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
-    M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \
+    M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \
     M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
     M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
     M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 4cfefbc5527..0c915f54cff 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -389,7 +389,7 @@ std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource:
     for (; option != end; ++option)
     {
         bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end);
-        auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url);
+        auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding);
 
         for (const auto & [param, value] : params)
             request_uri.addQueryParameter(param, value);

From f6a44f8eedce98bd50ceee72e5fdc4da1a82a43a Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 20 Jul 2023 12:40:41 +0000
Subject: [PATCH 136/242] Better

---
 base/poco/Foundation/include/Poco/URI.h |  6 +++--
 base/poco/Foundation/src/URI.cpp        | 34 +++++++++++++------------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index 5e6e7efd938..f4505147ced 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ public:
     URI();
     /// Creates an empty URI.
 
-    explicit URI(const std::string & uri, bool decode_and_encode_path = true);
+    explicit URI(const std::string & uri, bool disable_url_encoding = true);
     /// Parses an URI from the given string. Throws a
     /// SyntaxException if the uri is not valid.
 
@@ -351,6 +351,8 @@ protected:
 
 private:
     void encodePath(std::string & encodedStr) const;
+    void decodePath(const std::string & encodedStr);
+
 
     std::string _scheme;
     std::string _userInfo;
@@ -360,7 +362,7 @@ private:
     std::string _query;
     std::string _fragment;
 
-    bool _decode_and_encode_path = true;
+    bool _disable_url_encoding = true;
 };
 
 
diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 9bad1b39a87..3354c69d188 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -37,7 +37,7 @@ URI::URI():
 
 
 URI::URI(const std::string& uri, bool decode_and_encode_path):
-	_port(0), _decode_and_encode_path(decode_and_encode_path)
+	_port(0), _disable_url_encoding(decode_and_encode_path)
 {
 	parse(uri);
 }
@@ -108,7 +108,7 @@ URI::URI(const URI& uri):
 	_path(uri._path),
 	_query(uri._query),
 	_fragment(uri._fragment),
-    _decode_and_encode_path(uri._decode_and_encode_path)
+    _disable_url_encoding(uri._disable_url_encoding)
 {
 }
 
@@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
 	_path(baseURI._path),
 	_query(baseURI._query),
 	_fragment(baseURI._fragment),
-    _decode_and_encode_path(baseURI._decode_and_encode_path)
+    _disable_url_encoding(baseURI._disable_url_encoding)
 {
 	resolve(relativeURI);
 }
@@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri)
 		_path     = uri._path;
 		_query    = uri._query;
 		_fragment = uri._fragment;
-        _decode_and_encode_path = uri._decode_and_encode_path;
+        _disable_url_encoding = uri._disable_url_encoding;
 	}
 	return *this;
 }
@@ -184,7 +184,7 @@ void URI::swap(URI& uri)
 	std::swap(_path, uri._path);
 	std::swap(_query, uri._query);
 	std::swap(_fragment, uri._fragment);
-    std::swap(_decode_and_encode_path, uri._decode_and_encode_path);
+    std::swap(_disable_url_encoding, uri._disable_url_encoding);
 }
 
 
@@ -317,10 +317,7 @@ void URI::setAuthority(const std::string& authority)
 void URI::setPath(const std::string& path)
 {
 	_path.clear();
-    if (_decode_and_encode_path)
-	    decode(path, _path);
-    else
-        _path = path;
+    decodePath(path);
 }
 
 	
@@ -690,10 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
 
 void URI::encodePath(std::string & encodedStr) const
 {
-    if (_decode_and_encode_path)
-        encode(_path, RESERVED_PATH, encodedStr);
-    else
+    if (_disable_url_encoding)
         encodedStr = _path;
+    else
+        encode(_path, RESERVED_PATH, encodedStr);
+}
+
+void URI::decodePath(const std::string & encodedStr)
+{
+    if (_disable_url_encoding)
+        _path = encodedStr;
+    else
+        decode(encodedStr, _path);
 }
 
 bool URI::isWellKnownPort() const
@@ -834,10 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
 {
 	std::string path;
 	while (it != end && *it != '?' && *it != '#') path += *it++;
-    if (_decode_and_encode_path)
-	    decode(path, _path);
-    else
-        _path = path;
+    decodePath(path);
 }
 
 
From 97e54d6ebaa174f8d2ae291ddec20fd879b29bfa Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Wed, 19 Jul 2023 07:13:25 +0000
Subject: [PATCH 137/242] Fix test_backup_restore_on_cluster flakiness caused
 by missing replica syncs

---
 tests/integration/test_backup_restore_on_cluster/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py
index 6af3a7dbab8..39496b8a5c8 100644
--- a/tests/integration/test_backup_restore_on_cluster/test.py
+++ b/tests/integration/test_backup_restore_on_cluster/test.py
@@ -580,6 +580,7 @@ def test_required_privileges():
     node1.query(
         f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1"
     )
+    node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2")
 
     assert node2.query("SELECT * FROM tbl2") == "100\n"
 
@@ -593,6 +594,7 @@ def test_required_privileges():
 
     node1.query("GRANT INSERT, CREATE TABLE ON tbl TO u1")
     node1.query(f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1")
+    node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
 
     assert node2.query("SELECT * FROM tbl") == "100\n"
 

From ed59870f92fa2893c9c105eaaeff82b1efaede22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?=
 <krzysztof.goralski@clickhouse.com>
Date: Thu, 20 Jul 2023 18:04:58 +0200
Subject: [PATCH 138/242] Update LRUFileCachePriority.cpp

---
 src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 18862e154da..33e567b7a76 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -7,6 +7,7 @@
 namespace CurrentMetrics
 {
     extern const Metric FilesystemCacheSize;
+    extern const Metric FilesystemCacheSizeLimit;
     extern const Metric FilesystemCacheElements;
 }
 
@@ -101,6 +102,7 @@ void LRUFileCachePriority::updateSize(int64_t size)
 {
     current_size += size;
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
+    CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit());
 }
 
 void LRUFileCachePriority::updateElementsCount(int64_t num)

From b3c42a1171e3f631e8985b80fc3c822c7ac87dd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?=
 <krzysztof.goralski@clickhouse.com>
Date: Thu, 20 Jul 2023 18:06:54 +0200
Subject: [PATCH 139/242] Update CurrentMetrics.cpp with
 FilesystemCacheSizeLimit metric

---
 src/Common/CurrentMetrics.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 626b43aea2c..583b13cf79d 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -187,6 +187,7 @@
     M(CacheFileSegments, "Number of existing cache file segments") \
     M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \
     M(FilesystemCacheSize, "Filesystem cache size in bytes") \
+    M(FilesystemCacheSizeLimit, "Filesystem cache size limit in bytes") \
     M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \
     M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \
     M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \

From 24371c33bfd5037455cb025b057fb413ee1be396 Mon Sep 17 00:00:00 2001
From: lgbo-ustc <lgbo.ustc@gmail.com>
Date: Fri, 21 Jul 2023 09:24:16 +0800
Subject: [PATCH 140/242] remove DelayedBlocksTask::finish

---
 src/Processors/Transforms/JoiningTransform.cpp | 3 ++-
 src/Processors/Transforms/JoiningTransform.h   | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index 5480fea27a4..4e7868ea1c2 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -375,7 +375,8 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
         input.setNotNeeded();
     }
 
-    if (task->finished)
+    // When delayed_blocks is nullptr, it means that all buckets have been joined.
+    if (!task->delayed_blocks)
     {
         input.close();
         output.finish();
diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h
index 5e7403dbbdb..a308af03662 100644
--- a/src/Processors/Transforms/JoiningTransform.h
+++ b/src/Processors/Transforms/JoiningTransform.h
@@ -115,7 +115,7 @@ class DelayedBlocksTask : public ChunkInfo
 {
 public:
 
-    explicit DelayedBlocksTask() : finished(true) {}
+    DelayedBlocksTask() = default;
     explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_)
         : delayed_blocks(std::move(delayed_blocks_))
         , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_)
@@ -125,7 +125,6 @@ public:
     IBlocksStreamPtr delayed_blocks = nullptr;
     JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr;
 
-    bool finished = false;
 };
 
 using DelayedBlocksTaskPtr = std::shared_ptr<const DelayedBlocksTask>;

From f0e277f94a642647cfd3eb5ebc722b486d9203b0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 21 Jul 2023 06:45:35 +0200
Subject: [PATCH 141/242] Rename TaskStatsInfoGetter into
 NetlinkMetricsProvider

There is ProcfsMetricsProvider, so by analogy to it.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/CurrentThread.cpp                  |  1 -
 ...oGetter.cpp => NetlinkMetricsProvider.cpp} | 22 +++++++++----------
 ...sInfoGetter.h => NetlinkMetricsProvider.h} |  6 ++---
 src/Common/ThreadProfileEvents.cpp            |  6 ++---
 src/Disks/IO/ThreadPoolReader.cpp             |  2 +-
 src/IO/ReadBufferFromFileDescriptor.cpp       |  2 +-
 src/IO/SynchronousReader.cpp                  |  2 +-
 7 files changed, 20 insertions(+), 21 deletions(-)
 rename src/Common/{TaskStatsInfoGetter.cpp => NetlinkMetricsProvider.cpp} (93%)
 rename src/Common/{TaskStatsInfoGetter.h => NetlinkMetricsProvider.h} (85%)

diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index 057b1eeda12..ac5b712279e 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -3,7 +3,6 @@
 #include "CurrentThread.h"
 #include <Common/logger_useful.h>
 #include <Common/ThreadStatus.h>
-#include <Common/TaskStatsInfoGetter.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/Context.h>
 #include <base/getThreadId.h>
diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/NetlinkMetricsProvider.cpp
similarity index 93%
rename from src/Common/TaskStatsInfoGetter.cpp
rename to src/Common/NetlinkMetricsProvider.cpp
index 867a50c8cce..4c228bcc6fc 100644
--- a/src/Common/TaskStatsInfoGetter.cpp
+++ b/src/Common/NetlinkMetricsProvider.cpp
@@ -1,4 +1,4 @@
-#include "TaskStatsInfoGetter.h"
+#include "NetlinkMetricsProvider.h"
 #include <Common/Exception.h>
 #include <base/defines.h>
 #include <base/types.h>
@@ -200,7 +200,7 @@ bool checkPermissionsImpl()
     if (!res)
         return false;
 
-    /// Check that we can successfully initialize TaskStatsInfoGetter.
+    /// Check that we can successfully initialize NetlinkMetricsProvider.
     /// It will ask about family id through Netlink.
     /// On some LXC containers we have capability but we still cannot use Netlink.
     /// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result.
@@ -208,7 +208,7 @@ bool checkPermissionsImpl()
     try
     {
         ::taskstats stats{};
-        TaskStatsInfoGetter().getStat(stats, static_cast<pid_t>(getThreadId()));
+        NetlinkMetricsProvider().getStat(stats, static_cast<pid_t>(getThreadId()));
     }
     catch (const Exception & e)
     {
@@ -244,14 +244,14 @@ UInt16 getFamilyId(int fd)
 }
 
 
-bool TaskStatsInfoGetter::checkPermissions()
+bool NetlinkMetricsProvider::checkPermissions()
 {
     static bool res = checkPermissionsImpl();
     return res;
 }
 
 
-TaskStatsInfoGetter::TaskStatsInfoGetter()
+NetlinkMetricsProvider::NetlinkMetricsProvider()
 {
     netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
     if (netlink_socket_fd < 0)
@@ -293,7 +293,7 @@ TaskStatsInfoGetter::TaskStatsInfoGetter()
 }
 
 
-void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
+void NetlinkMetricsProvider::getStat(::taskstats & out_stats, pid_t tid) const
 {
     NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid));
 
@@ -318,7 +318,7 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
 }
 
 
-TaskStatsInfoGetter::~TaskStatsInfoGetter()
+NetlinkMetricsProvider::~NetlinkMetricsProvider()
 {
     if (netlink_socket_fd >= 0)
     {
@@ -335,15 +335,15 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter()
 namespace DB
 {
 
-bool TaskStatsInfoGetter::checkPermissions()
+bool NetlinkMetricsProvider::checkPermissions()
 {
     return false;
 }
 
-TaskStatsInfoGetter::TaskStatsInfoGetter() = default;
-TaskStatsInfoGetter::~TaskStatsInfoGetter() = default;
+NetlinkMetricsProvider::NetlinkMetricsProvider() = default;
+NetlinkMetricsProvider::~NetlinkMetricsProvider() = default;
 
-void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const
+void NetlinkMetricsProvider::getStat(::taskstats &, pid_t) const
 {
 }
 
diff --git a/src/Common/TaskStatsInfoGetter.h b/src/Common/NetlinkMetricsProvider.h
similarity index 85%
rename from src/Common/TaskStatsInfoGetter.h
rename to src/Common/NetlinkMetricsProvider.h
index 66655d7ad0d..8a54f33be80 100644
--- a/src/Common/TaskStatsInfoGetter.h
+++ b/src/Common/NetlinkMetricsProvider.h
@@ -15,11 +15,11 @@ namespace DB
 ///
 ///   [1]: https://elixir.bootlin.com/linux/v5.18-rc4/source/kernel/tsacct.c#L101
 ///
-class TaskStatsInfoGetter : private boost::noncopyable
+class NetlinkMetricsProvider : private boost::noncopyable
 {
 public:
-    TaskStatsInfoGetter();
-    ~TaskStatsInfoGetter();
+    NetlinkMetricsProvider();
+    ~NetlinkMetricsProvider();
 
     void getStat(::taskstats & out_stats, pid_t tid) const;
 
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index a94fd81559a..256f53df011 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -2,7 +2,7 @@
 
 #if defined(OS_LINUX)
 
-#include "TaskStatsInfoGetter.h"
+#include "NetlinkMetricsProvider.h"
 #include "ProcfsMetricsProvider.h"
 #include "hasLinuxCapability.h"
 
@@ -99,7 +99,7 @@ TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvide
     static std::optional<MetricsProvider> provider =
         []() -> MetricsProvider
         {
-            if (TaskStatsInfoGetter::checkPermissions())
+            if (NetlinkMetricsProvider::checkPermissions())
             {
                 return MetricsProvider::Netlink;
             }
@@ -119,7 +119,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
     switch (provider)
     {
     case MetricsProvider::Netlink:
-        stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
+        stats_getter = [metrics_provider = std::make_shared<NetlinkMetricsProvider>(), tid]()
                 {
                     ::taskstats result{};
                     metrics_provider->getStat(result, static_cast<pid_t>(tid));
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index effa19bc1af..cd3f2d8dea0 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -114,7 +114,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
         /// It reports real time spent including the time spent while thread was preempted doing nothing.
         /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
         /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
-        /// (TaskStatsInfoGetter has about 500K RPS).
+        /// (NetlinkMetricsProvider has about 500K RPS).
         Stopwatch watch(CLOCK_MONOTONIC);
 
         SCOPE_EXIT({
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index 67bc01279c3..6c0c1681a4c 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -95,7 +95,7 @@ size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_
         /// It reports real time spent including the time spent while thread was preempted doing nothing.
         /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
         /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
-        /// (TaskStatsInfoGetter has about 500K RPS).
+        /// (NetlinkMetricsProvider has about 500K RPS).
         watch.stop();
         ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
 
diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp
index 7cef3bd8963..e1c654e48a3 100644
--- a/src/IO/SynchronousReader.cpp
+++ b/src/IO/SynchronousReader.cpp
@@ -78,7 +78,7 @@ std::future<IAsynchronousReader::Result> SynchronousReader::submit(Request reque
         /// It reports real time spent including the time spent while thread was preempted doing nothing.
         /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
         /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
-        /// (TaskStatsInfoGetter has about 500K RPS).
+        /// (NetlinkMetricsProvider has about 500K RPS).
         watch.stop();
         ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
 

From 51e81b37a4158e31b61c5ae8d993a2dee0ae16e3 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Fri, 7 Jul 2023 18:45:13 +0200
Subject: [PATCH 142/242] Move condtions with columns from PK to the end of
 PREWHERE chain

---
 .../MergeTree/MergeTreeWhereOptimizer.cpp     | 31 +++++++++++++++++++
 .../MergeTree/MergeTreeWhereOptimizer.h       |  8 ++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 1620ba98d58..4ff58c1fc86 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -22,6 +22,33 @@ namespace DB
 /// This is used to assume that condition is likely to have good selectivity.
 static constexpr auto threshold = 2;
 
+static NameToIndexMap fillNamesPositions(const Names & names)
+{
+    NameToIndexMap names_positions;
+
+    for (size_t position = 0; position < names.size(); ++position)
+    {
+        const auto & name = names[position];
+        names_positions[name] = position;
+    }
+
+    return names_positions;
+}
+
+/// Find minimal position of any of the column in primary key.
+static Int64 findMinPosition(const NameSet & condition_table_columns, const NameToIndexMap & primary_key_positions)
+{
+    Int64 min_position = std::numeric_limits<Int64>::max() - 1;
+
+    for (const auto & column : condition_table_columns)
+    {
+        auto it = primary_key_positions.find(column);
+        if (it != primary_key_positions.end())
+            min_position = std::min(min_position, static_cast<Int64>(it->second));
+    }
+
+    return min_position;
+}
 
 MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     std::unordered_map<std::string, UInt64> column_sizes_,
@@ -35,6 +62,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     , supported_columns{supported_columns_}
     , sorting_key_names{NameSet(
           metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())}
+    , primary_key_names_positions(fillNamesPositions(metadata_snapshot->getPrimaryKey().column_names))
     , log{log_}
     , column_sizes{std::move(column_sizes_)}
 {
@@ -234,6 +262,9 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
         if (cond.viable)
             cond.good = isConditionGood(node, table_columns);
 
+        /// Find min position in PK of any column that is used in this condition.
+        cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions);
+
         res.emplace_back(std::move(cond));
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 18555a72db1..8ab21471aeb 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -72,9 +72,14 @@ private:
         /// Does the condition presumably have good selectivity?
         bool good = false;
 
+        /// Does the condition contain primary key column?
+        /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any
+        /// column in this condition because this condition have bigger chances to be already satisfied by PK analysis.
+        Int64 min_position_in_primary_key = std::numeric_limits<Int64>::max() - 1;
+
         auto tuple() const
         {
-            return std::make_tuple(!viable, !good, columns_size, table_columns.size());
+            return std::make_tuple(!viable, !good, -min_position_in_primary_key, columns_size, table_columns.size());
         }
 
         /// Is condition a better candidate for moving to PREWHERE?
@@ -141,6 +146,7 @@ private:
     const Names queried_columns;
     const std::optional<NameSet> supported_columns;
     const NameSet sorting_key_names;
+    const NameToIndexMap primary_key_names_positions;
     Poco::Logger * log;
     std::unordered_map<std::string, UInt64> column_sizes;
     UInt64 total_size_of_queried_columns = 0;

From fe7cff5c1cfee89d411ec79e7a3e7603ff831782 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Wed, 19 Jul 2023 22:45:04 +0200
Subject: [PATCH 143/242] Added move_primary_key_columns_to_end_of_prewhere
 setting

---
 src/Core/Settings.h                                |  1 +
 src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 11 +++++++++--
 src/Storages/MergeTree/MergeTreeWhereOptimizer.h   |  1 +
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6fb26994d2f..5a1f4b46223 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -129,6 +129,7 @@ class IColumn;
     M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \
     M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \
     M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
+    M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
     \
     M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
     M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 4ff58c1fc86..5efb7286685 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -88,6 +88,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
     where_optimizer_context.context = context;
     where_optimizer_context.array_joined_names = determineArrayJoinedNames(select);
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
+    where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
     where_optimizer_context.is_final = select.final();
 
     RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/);
@@ -117,6 +118,7 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
     where_optimizer_context.context = context;
     where_optimizer_context.array_joined_names = {};
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere;
+    where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere;
     where_optimizer_context.is_final = is_final;
 
     RPNBuilderTreeContext tree_context(context);
@@ -262,8 +264,13 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree
         if (cond.viable)
             cond.good = isConditionGood(node, table_columns);
 
-        /// Find min position in PK of any column that is used in this condition.
-        cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions);
+        if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere)
+        {
+            /// Consider all conditions good with this setting enabled.
+            cond.good = cond.viable;
+            /// Find min position in PK of any column that is used in this condition.
+            cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions);
+        }
 
         res.emplace_back(std::move(cond));
     }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 8ab21471aeb..fb5e84b67c6 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -96,6 +96,7 @@ private:
         ContextPtr context;
         NameSet array_joined_names;
         bool move_all_conditions_to_prewhere = false;
+        bool move_primary_key_columns_to_end_of_prewhere = false;
         bool is_final = false;
     };
 

From 0c2ea94efeeb9a68448d44c1eba08ad3898ef99f Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Fri, 21 Jul 2023 09:57:32 +0200
Subject: [PATCH 144/242] Update test

---
 .../0_stateless/02156_storage_merge_prewhere.reference       | 2 +-
 tests/queries/0_stateless/02156_storage_merge_prewhere.sql   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
index 30f9b1ab175..74ba452d783 100644
--- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
+++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
@@ -1,6 +1,6 @@
 SELECT count()
 FROM t_02156_merge1
-PREWHERE (k = 3) AND notEmpty(v)
+PREWHERE notEmpty(v) AND (k = 3)
 2
 SELECT count()
 FROM t_02156_merge2
diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql
index b75d3fa22e5..83d88a68d9b 100644
--- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql
+++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql
@@ -1,4 +1,5 @@
 SET optimize_move_to_prewhere = 1;
+SET enable_multiple_prewhere_read_steps = 1;
 
 DROP TABLE IF EXISTS t_02156_mt1;
 DROP TABLE IF EXISTS t_02156_mt2;
@@ -8,8 +9,8 @@ DROP TABLE IF EXISTS t_02156_merge1;
 DROP TABLE IF EXISTS t_02156_merge2;
 DROP TABLE IF EXISTS t_02156_merge3;
 
-CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k;
-CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k;
+CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0;
+CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0;
 CREATE TABLE t_02156_log (k UInt32, v String) ENGINE = Log;
 
 CREATE TABLE t_02156_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_mt1);

From d2dba496bf0c703178758b1c534c0914044d2094 Mon Sep 17 00:00:00 2001
From: StianBerger <111980234+StianBerger@users.noreply.github.com>
Date: Fri, 21 Jul 2023 10:26:01 +0200
Subject: [PATCH 145/242] Update date-time-functions.md

formatDateTime %r for 12-hour time, mentioned %H in  equivalent, which is 24H. Replaced with %h.
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index c6b978506a1..87d84425029 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1449,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
 | %Q       | Quarter (1-4)                                           | 1          |
-| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p        | 10:30 PM   |
+| %r       | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p        | 10:30 PM   |
 | %R       | 24-hour HH:MM time, equivalent to %H:%i                 | 22:33      |
 | %s       | second (00-59)                                          | 44         |
 | %S       | second (00-59)                                          | 44         |

From 53d77e6b1397e3621a81fc88da76aa9bac72ad75 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 19 Jul 2023 21:28:17 +0800
Subject: [PATCH 146/242] Add back missing projection QueryAccessInfo.

---
 src/Interpreters/Context.cpp                  | 13 ++--
 src/Interpreters/Context.h                    |  1 +
 .../optimizeUseAggregateProjection.cpp        | 18 +++++-
 .../optimizeUseNormalProjection.cpp           |  9 ++-
 .../QueryPlan/ReadFromMergeTree.cpp           |  4 ++
 .../QueryPlan/ReadFromPreparedSource.cpp      |  8 ++-
 .../QueryPlan/ReadFromPreparedSource.h        |  3 +-
 ...0_query_log_with_projection_info.reference |  3 +
 .../01710_query_log_with_projection_info.sql  | 64 +++++++++++++++++++
 9 files changed, 114 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.reference
 create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.sql

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 9e4d1e8d1e2..434fc1adb40 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1461,15 +1461,20 @@ void Context::addQueryAccessInfo(
 void Context::addQueryAccessInfo(const Names & partition_names)
 {
     if (isGlobalContext())
-    {
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
-    }
 
     std::lock_guard<std::mutex> lock(query_access_info.mutex);
     for (const auto & partition_name : partition_names)
-    {
         query_access_info.partitions.emplace(partition_name);
-    }
+}
+
+void Context::addQueryAccessInfo(const String & qualified_projection_name)
+{
+    if (isGlobalContext())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
+
+    std::lock_guard<std::mutex> lock(query_access_info.mutex);
+    query_access_info.projections.emplace(qualified_projection_name);
 }
 
 void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 3a8d41bf130..3ce899bfb77 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -657,6 +657,7 @@ public:
         const String & projection_name = {},
         const String & view_name = {});
     void addQueryAccessInfo(const Names & partition_names);
+    void addQueryAccessInfo(const String & qualified_projection_name);
 
 
     /// Supported factories for records in query_log
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index f183bdca7a9..8c85435138c 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -625,7 +625,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
         //           candidates.minmax_projection->block.dumpStructure());
 
         Pipe pipe(std::make_shared<SourceFromSingleChunk>(std::move(candidates.minmax_projection->block)));
-        projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+        projection_reading = std::make_unique<ReadFromPreparedSource>(
+            std::move(pipe),
+            context,
+            query_info.is_internal ? ""
+                                   : fmt::format(
+                                       "{}.{}",
+                                       reading->getMergeTreeData().getStorageID().getFullTableName(),
+                                       backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name)));
 
         has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty();
         if (has_ordinary_parts)
@@ -658,7 +665,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
         {
             auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames());
             Pipe pipe(std::make_shared<NullSource>(std::move(header)));
-            projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+            projection_reading = std::make_unique<ReadFromPreparedSource>(
+                std::move(pipe),
+                context,
+                query_info.is_internal ? ""
+                                       : fmt::format(
+                                           "{}.{}",
+                                           reading->getMergeTreeData().getStorageID().getFullTableName(),
+                                           backQuoteIfNeed(best_candidate->projection->name)));
         }
 
         has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index dd7a5d449bc..71db561e1c9 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -183,7 +183,14 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
     if (!projection_reading)
     {
         Pipe pipe(std::make_shared<NullSource>(proj_snapshot->getSampleBlockForColumns(required_columns)));
-        projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+        projection_reading = std::make_unique<ReadFromPreparedSource>(
+            std::move(pipe),
+            context,
+            query_info.is_internal ? ""
+                                   : fmt::format(
+                                       "{}.{}",
+                                       reading->getMergeTreeData().getStorageID().getFullTableName(),
+                                       backQuoteIfNeed(best_candidate->projection->name)));
     }
 
     bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 13de5d1d140..82f47cc61d5 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1761,6 +1761,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
                 fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id));
         }
         context->getQueryContext()->addQueryAccessInfo(partition_names);
+
+        if (storage_snapshot->projection)
+            context->getQueryContext()->addQueryAccessInfo(
+                fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name)));
     }
 
     ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
index 7446203ec35..d50eec47ca8 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
@@ -1,17 +1,23 @@
+#include <Interpreters/Context.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
 
-ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_)
+ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_)
     : ISourceStep(DataStream{.header = pipe_.getHeader()})
     , pipe(std::move(pipe_))
+    , context(context_)
+    , qualified_projection_name(qualified_projection_name_)
 {
 }
 
 void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
+    if (context && context->hasQueryContext() && !qualified_projection_name.empty())
+        context->getQueryContext()->addQueryAccessInfo(qualified_projection_name);
+
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
 
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h
index 05e3ebd5102..5e64dcb7a4f 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.h
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h
@@ -9,7 +9,7 @@ namespace DB
 class ReadFromPreparedSource : public ISourceStep
 {
 public:
-    explicit ReadFromPreparedSource(Pipe pipe_);
+    explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = "");
 
     String getName() const override { return "ReadFromPreparedSource"; }
 
@@ -18,6 +18,7 @@ public:
 protected:
     Pipe pipe;
     ContextPtr context;
+    String qualified_projection_name;
 };
 
 class ReadFromStorageStep : public ReadFromPreparedSource
diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.reference b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference
new file mode 100644
index 00000000000..9c2e9df6662
--- /dev/null
+++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference
@@ -0,0 +1,3 @@
+t.t_normal
+t.t_agg
+t._minmax_count_projection
diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql
new file mode 100644
index 00000000000..25e7e8fed60
--- /dev/null
+++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql
@@ -0,0 +1,64 @@
+set log_queries=1;
+set log_queries_min_type='QUERY_FINISH';
+set optimize_use_implicit_projections=1;
+
+DROP TABLE IF EXISTS t;
+
+CREATE TABLE t
+(
+    `id` UInt64,
+    `id2` UInt64,
+    `id3` UInt64,
+    PROJECTION t_normal
+    (
+        SELECT
+            id,
+            id2,
+            id3
+        ORDER BY
+            id2,
+            id,
+            id3
+    ),
+    PROJECTION t_agg
+    (
+        SELECT
+            sum(id3)
+        GROUP BY id2
+    )
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 8;
+
+insert into t SELECT number, -number, number FROM numbers(10000);
+
+SELECT * FROM t WHERE id2 = 3 FORMAT Null;
+SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null;
+SELECT min(id) FROM t FORMAT Null;
+
+SYSTEM FLUSH LOGS;
+
+SELECT
+    --Remove the prefix string which is a mutable database name.
+    arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.')
+FROM
+    system.query_log
+WHERE
+    current_database=currentDatabase() and query = 'SELECT * FROM t WHERE id2 = 3 FORMAT Null;';
+
+SELECT
+    --Remove the prefix string which is a mutable database name.
+    arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.')
+FROM
+    system.query_log
+WHERE
+    current_database=currentDatabase() and query = 'SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null;';
+
+SELECT
+    --Remove the prefix string which is a mutable database name.
+    arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.')
+FROM
+    system.query_log
+WHERE
+    current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;';

From 8187118232371630fb10ee4062b8a52285003fa0 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 20 Jul 2023 11:12:22 +0800
Subject: [PATCH 147/242] Better code

---
 src/Interpreters/Context.cpp                   |  8 ++++++--
 src/Interpreters/Context.h                     |  9 ++++++++-
 .../optimizeUseAggregateProjection.cpp         | 18 ++++++++----------
 .../optimizeUseNormalProjection.cpp            |  9 ++++-----
 src/Processors/QueryPlan/ReadFromMergeTree.cpp |  2 +-
 .../QueryPlan/ReadFromPreparedSource.cpp       |  9 ++++-----
 .../QueryPlan/ReadFromPreparedSource.h         |  7 +++++--
 7 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 434fc1adb40..cc1277e08b9 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1468,13 +1468,17 @@ void Context::addQueryAccessInfo(const Names & partition_names)
         query_access_info.partitions.emplace(partition_name);
 }
 
-void Context::addQueryAccessInfo(const String & qualified_projection_name)
+void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
 {
+    if (!qualified_projection_name)
+        return;
+
     if (isGlobalContext())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
 
     std::lock_guard<std::mutex> lock(query_access_info.mutex);
-    query_access_info.projections.emplace(qualified_projection_name);
+    query_access_info.projections.emplace(fmt::format(
+        "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name)));
 }
 
 void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 3ce899bfb77..fa210f04451 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -657,7 +657,14 @@ public:
         const String & projection_name = {},
         const String & view_name = {});
     void addQueryAccessInfo(const Names & partition_names);
-    void addQueryAccessInfo(const String & qualified_projection_name);
+
+    struct QualifiedProjectionName
+    {
+        StorageID storage_id = StorageID::createEmpty();
+        String projection_name;
+        explicit operator bool() const { return !projection_name.empty(); }
+    };
+    void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
 
 
     /// Supported factories for records in query_log
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 8c85435138c..fa6a7f5b8ea 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -628,11 +628,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
         projection_reading = std::make_unique<ReadFromPreparedSource>(
             std::move(pipe),
             context,
-            query_info.is_internal ? ""
-                                   : fmt::format(
-                                       "{}.{}",
-                                       reading->getMergeTreeData().getStorageID().getFullTableName(),
-                                       backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name)));
+            query_info.is_internal ? Context::QualifiedProjectionName{}
+                                   : Context::QualifiedProjectionName{
+                                       .storage_id = reading->getMergeTreeData().getStorageID(),
+                                       .projection_name = candidates.minmax_projection->candidate.projection->name});
 
         has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty();
         if (has_ordinary_parts)
@@ -668,11 +667,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
             projection_reading = std::make_unique<ReadFromPreparedSource>(
                 std::move(pipe),
                 context,
-                query_info.is_internal ? ""
-                                       : fmt::format(
-                                           "{}.{}",
-                                           reading->getMergeTreeData().getStorageID().getFullTableName(),
-                                           backQuoteIfNeed(best_candidate->projection->name)));
+                query_info.is_internal
+                    ? Context::QualifiedProjectionName{}
+                    : Context::QualifiedProjectionName{
+                        .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name});
         }
 
         has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index 71db561e1c9..93d1be20e81 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -186,11 +186,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
         projection_reading = std::make_unique<ReadFromPreparedSource>(
             std::move(pipe),
             context,
-            query_info.is_internal ? ""
-                                   : fmt::format(
-                                       "{}.{}",
-                                       reading->getMergeTreeData().getStorageID().getFullTableName(),
-                                       backQuoteIfNeed(best_candidate->projection->name)));
+            query_info.is_internal
+                ? Context::QualifiedProjectionName{}
+                : Context::QualifiedProjectionName{
+                    .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name});
     }
 
     bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 82f47cc61d5..2d2412f7e36 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1764,7 +1764,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
 
         if (storage_snapshot->projection)
             context->getQueryContext()->addQueryAccessInfo(
-                fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name)));
+                Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name});
     }
 
     ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
index d50eec47ca8..a24c4dbe4d0 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
@@ -1,21 +1,20 @@
-#include <Interpreters/Context.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
 
-ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_)
+ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_)
     : ISourceStep(DataStream{.header = pipe_.getHeader()})
     , pipe(std::move(pipe_))
-    , context(context_)
-    , qualified_projection_name(qualified_projection_name_)
+    , context(std::move(context_))
+    , qualified_projection_name(std::move(qualified_projection_name_))
 {
 }
 
 void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    if (context && context->hasQueryContext() && !qualified_projection_name.empty())
+    if (context && context->hasQueryContext())
         context->getQueryContext()->addQueryAccessInfo(qualified_projection_name);
 
     for (const auto & processor : pipe.getProcessors())
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h
index 5e64dcb7a4f..2606f501009 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.h
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h
@@ -1,4 +1,6 @@
 #pragma once
+
+#include <Interpreters/Context.h>
 #include <Processors/QueryPlan/ISourceStep.h>
 #include <QueryPipeline/Pipe.h>
 
@@ -9,7 +11,8 @@ namespace DB
 class ReadFromPreparedSource : public ISourceStep
 {
 public:
-    explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = "");
+    explicit ReadFromPreparedSource(
+        Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {});
 
     String getName() const override { return "ReadFromPreparedSource"; }
 
@@ -18,7 +21,7 @@ public:
 protected:
     Pipe pipe;
     ContextPtr context;
-    String qualified_projection_name;
+    Context::QualifiedProjectionName qualified_projection_name;
 };
 
 class ReadFromStorageStep : public ReadFromPreparedSource

From 2cad81731be0443b50e66e43fb68b2b064d67a77 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 21 Jul 2023 16:46:56 +0800
Subject: [PATCH 148/242] Try to fix style issues

---
 .../optimizeUseAggregateProjection.cpp        | 19 ++++++++++++-------
 .../optimizeUseNormalProjection.cpp           |  7 +++++--
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index fa6a7f5b8ea..53f47bcdf95 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -628,11 +628,13 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
         projection_reading = std::make_unique<ReadFromPreparedSource>(
             std::move(pipe),
             context,
-            query_info.is_internal ? Context::QualifiedProjectionName{}
-                                   : Context::QualifiedProjectionName{
-                                       .storage_id = reading->getMergeTreeData().getStorageID(),
-                                       .projection_name = candidates.minmax_projection->candidate.projection->name});
-
+            query_info.is_internal
+                ? Context::QualifiedProjectionName{}
+                : Context::QualifiedProjectionName
+                  {
+                      .storage_id = reading->getMergeTreeData().getStorageID(),
+                      .projection_name = candidates.minmax_projection->candidate.projection->name,
+                  });
         has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty();
         if (has_ordinary_parts)
             reading->resetParts(std::move(candidates.minmax_projection->normal_parts));
@@ -669,8 +671,11 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
                 context,
                 query_info.is_internal
                     ? Context::QualifiedProjectionName{}
-                    : Context::QualifiedProjectionName{
-                        .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name});
+                    : Context::QualifiedProjectionName
+                      {
+                          .storage_id = reading->getMergeTreeData().getStorageID(),
+                          .projection_name = best_candidate->projection->name,
+                      });
         }
 
         has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index 93d1be20e81..f6ace6f8025 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -188,8 +188,11 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
             context,
             query_info.is_internal
                 ? Context::QualifiedProjectionName{}
-                : Context::QualifiedProjectionName{
-                    .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name});
+                : Context::QualifiedProjectionName
+                  {
+                      .storage_id = reading->getMergeTreeData().getStorageID(),
+                      .projection_name = best_candidate->projection->name,
+                  });
     }
 
     bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;

From 5fa45bdbeaef99ba6a7db894d89dc749b7ac3f97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?=
 <krzysztof.goralski@clickhouse.com>
Date: Fri, 21 Jul 2023 12:12:34 +0200
Subject: [PATCH 149/242] Setting the metric FilesystemCacheSizeLimit in 
 LRUFileCachePriority.h

---
 src/Interpreters/Cache/LRUFileCachePriority.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index e0d7d45062a..662a76968bc 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -5,6 +5,12 @@
 #include <Interpreters/Cache/FileCacheKey.h>
 #include <Common/logger_useful.h>
 
+namespace CurrentMetrics
+{
+    extern const Metric FilesystemCacheSizeLimit;
+}
+
+
 namespace DB
 {
 
@@ -18,7 +24,9 @@ private:
     using LRUQueueIterator = typename LRUQueue::iterator;
 
 public:
-    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
+    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {
+        CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
+    }
 
     size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
 

From 930d45303c5b96b7553d611e82e0c94215ef5705 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?=
 <krzysztof.goralski@clickhouse.com>
Date: Fri, 21 Jul 2023 12:13:38 +0200
Subject: [PATCH 150/242] removing the metric set from LRUFileCachePriority.cpp

---
 src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 33e567b7a76..18862e154da 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -7,7 +7,6 @@
 namespace CurrentMetrics
 {
     extern const Metric FilesystemCacheSize;
-    extern const Metric FilesystemCacheSizeLimit;
     extern const Metric FilesystemCacheElements;
 }
 
@@ -102,7 +101,6 @@ void LRUFileCachePriority::updateSize(int64_t size)
 {
     current_size += size;
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
-    CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit());
 }
 
 void LRUFileCachePriority::updateElementsCount(int64_t num)

From 3412dd225919f3850dfb4c0f8647e74e6630e31f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?=
 <krzysztof.goralski@clickhouse.com>
Date: Fri, 21 Jul 2023 12:14:30 +0200
Subject: [PATCH 151/242] removed unnecessary whitespace

---
 src/Interpreters/Cache/LRUFileCachePriority.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 662a76968bc..9396070b792 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -10,7 +10,6 @@ namespace CurrentMetrics
     extern const Metric FilesystemCacheSizeLimit;
 }
 
-
 namespace DB
 {
 

From e638a9ecd3cebe83c0c3997b19e0e73d1fb14639 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 21 Jul 2023 12:24:36 +0200
Subject: [PATCH 152/242] Fix style check

---
 src/Interpreters/Cache/LRUFileCachePriority.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 9396070b792..e041e59a91a 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -23,7 +23,8 @@ private:
     using LRUQueueIterator = typename LRUQueue::iterator;
 
 public:
-    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {
+    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_)
+    {
         CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
     }
 

From 3815a6aa62414fcd4e51090450a08bc368fd8c86 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 21 Jul 2023 11:14:39 +0000
Subject: [PATCH 153/242] Make test_keeper_force_recovery more stable

---
 .../configs/enable_keeper1.xml                |  2 ++
 .../configs/enable_keeper2.xml                |  2 ++
 .../configs/enable_keeper3.xml                |  2 ++
 .../configs/enable_keeper4.xml                |  2 ++
 .../configs/enable_keeper5.xml                |  2 ++
 .../configs/enable_keeper6.xml                |  2 ++
 .../configs/enable_keeper7.xml                |  2 ++
 .../configs/enable_keeper8.xml                |  2 ++
 .../configs/recovered_keeper1.xml             |  2 ++
 .../configs/use_keeper.xml                    | 36 -------------------
 .../test_keeper_force_recovery/test.py        |  5 +--
 .../configs/enable_keeper1.xml                |  2 ++
 .../configs/enable_keeper1_solo.xml           |  2 ++
 .../configs/enable_keeper2.xml                |  2 ++
 .../configs/enable_keeper3.xml                |  2 ++
 .../configs/use_keeper.xml                    | 16 ---------
 .../test.py                                   |  5 +--
 17 files changed, 28 insertions(+), 60 deletions(-)
 delete mode 100644 tests/integration/test_keeper_force_recovery/configs/use_keeper.xml
 delete mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml

diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml
index b7f9d1b058e..62109ee9092 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml
index b773d59f259..2696c573180 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>2</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml
index d4c2befc10f..fc0c0fd0300 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>3</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml
index c039e709c9e..06f1c1d7195 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>4</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml
index fb43b6524c8..5d3767ae969 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>5</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml
index 430e662bf36..4d30822741a 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>6</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml
index aa10774ad7d..b59141042ea 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>7</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml
index 4f1c21a1084..711d70cb1ac 100644
--- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>8</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml
index eaf0f01afc9..abd4ef85bf7 100644
--- a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml
+++ b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml
deleted file mode 100644
index f41e8c6e49c..00000000000
--- a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<clickhouse>
-    <zookeeper>
-        <node index="1">
-            <host>node1</host>
-            <port>9181</port>
-        </node>
-        <node index="2">
-            <host>node2</host>
-            <port>9181</port>
-        </node>
-        <node index="3">
-            <host>node3</host>
-            <port>9181</port>
-        </node>
-        <node index="4">
-            <host>node4</host>
-            <port>9181</port>
-        </node>
-        <node index="5">
-            <host>node5</host>
-            <port>9181</port>
-        </node>
-        <node index="6">
-            <host>node6</host>
-            <port>9181</port>
-        </node>
-        <node index="7">
-            <host>node7</host>
-            <port>9181</port>
-        </node>
-        <node index="8">
-            <host>node8</host>
-            <port>9181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py
index f7c3787b4d8..f630e5a422b 100644
--- a/tests/integration/test_keeper_force_recovery/test.py
+++ b/tests/integration/test_keeper_force_recovery/test.py
@@ -22,10 +22,7 @@ def get_nodes():
         nodes.append(
             cluster.add_instance(
                 f"node{i+1}",
-                main_configs=[
-                    f"configs/enable_keeper{i+1}.xml",
-                    f"configs/use_keeper.xml",
-                ],
+                main_configs=[f"configs/enable_keeper{i+1}.xml"],
                 stay_alive=True,
             )
         )
diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml
index 441c1bc185d..94e59128bd3 100644
--- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml
+++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml
index f0cb887b062..6367b4b4c29 100644
--- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml
+++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <force_recovery>1</force_recovery>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
@@ -11,6 +12,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml
index e2e2c1fd7db..548d12c2e0a 100644
--- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml
+++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>2</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml
index e2ac0400d88..65f9675cbd6 100644
--- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml
+++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml
@@ -1,5 +1,6 @@
 <clickhouse>
     <keeper_server>
+        <use_cluster>false</use_cluster>
         <tcp_port>9181</tcp_port>
         <server_id>3</server_id>
         <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
@@ -10,6 +11,7 @@
             <session_timeout_ms>10000</session_timeout_ms>
             <snapshot_distance>75</snapshot_distance>
             <raft_logs_level>trace</raft_logs_level>
+            <max_requests_batch_size>200</max_requests_batch_size>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml
deleted file mode 100644
index 384e984f210..00000000000
--- a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-<clickhouse>
-    <zookeeper>
-        <node index="1">
-            <host>node1</host>
-            <port>9181</port>
-        </node>
-        <node index="2">
-            <host>node2</host>
-            <port>9181</port>
-        </node>
-        <node index="3">
-            <host>node3</host>
-            <port>9181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py
index 1c0d5e9a306..132c5488df6 100644
--- a/tests/integration/test_keeper_force_recovery_single_node/test.py
+++ b/tests/integration/test_keeper_force_recovery_single_node/test.py
@@ -20,10 +20,7 @@ def get_nodes():
         nodes.append(
             cluster.add_instance(
                 f"node{i+1}",
-                main_configs=[
-                    f"configs/enable_keeper{i+1}.xml",
-                    f"configs/use_keeper.xml",
-                ],
+                main_configs=[f"configs/enable_keeper{i+1}.xml"],
                 stay_alive=True,
             )
         )

From 8970a531453cbfd9916681bf658ba605db0129de Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Fri, 21 Jul 2023 11:38:46 +0000
Subject: [PATCH 154/242] Fix `countSubstrings()` hang with empty needle and a
 column haystack

---
 src/Functions/CountSubstringsImpl.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h
index de00e9397d6..1501e50afcf 100644
--- a/src/Functions/CountSubstringsImpl.h
+++ b/src/Functions/CountSubstringsImpl.h
@@ -49,6 +49,9 @@ struct CountSubstringsImpl
         /// FIXME: suboptimal
         memset(&res[0], 0, res.size() * sizeof(res[0]));
 
+        if (needle.empty())
+            return; // Return all zeros
+
         /// Current index in the array of strings.
         size_t i = 0;
 

From 66c8066cd59a685ded94af69d2cc37469cb29d77 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Fri, 21 Jul 2023 11:45:44 +0000
Subject: [PATCH 155/242] add test

---
 tests/queries/0_stateless/01590_countSubstrings.reference | 2 ++
 tests/queries/0_stateless/01590_countSubstrings.sql       | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01590_countSubstrings.reference b/tests/queries/0_stateless/01590_countSubstrings.reference
index 056cb4c53b6..95031cd3856 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.reference
+++ b/tests/queries/0_stateless/01590_countSubstrings.reference
@@ -7,6 +7,8 @@ empty
 0
 0
 0
+0
+0
 char
 1
 2
diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql
index dd2122ed6ff..6d2d87b1260 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.sql
+++ b/tests/queries/0_stateless/01590_countSubstrings.sql
@@ -12,6 +12,8 @@ select 'empty';
 select countSubstrings('', '.');
 select countSubstrings('', '');
 select countSubstrings('.', '');
+select countSubstrings(toString(number), '') from numbers(1);
+select countSubstrings('', toString(number)) from numbers(1);
 
 select 'char';
 select countSubstrings('foobar.com', '.');

From 0f969923229375d72faac15257fc70bd7ece9095 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 16 Jul 2023 08:07:50 +0200
Subject: [PATCH 156/242] Fix possible EADDRINUSE ("Address already in use") in
 integration tests

Here is one example [1]:

    minio1_1    | WARNING: Console endpoint is listening on a dynamic port (32911), please use --console-address ":PORT" to choose a static port.
    minio1_1    | ERROR Unable to initialize console server: Specified port is already in use
    minio1_1    |       > Please ensure no other program uses the same address/port

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/52103/7d510eac7c5f0dfb3361e269be30972e6022fada/integration_tests__tsan__[1_6].html

And here is second [2]:

    java.net.BindException: Problem binding to [0.0.0.0:50020] java.net.BindException: Address already in use; For more details see:  http://wiki.apache.org/hadoop/BindException

  [2]: https://s3.amazonaws.com/clickhouse-test-reports/51493/63e88b725d3d255a6534adce4d434ce5f95d2874/integration_tests__asan__[1_6].html

v2: increase the limit from 5K to 10K
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/conftest.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 5933883f7b0..968571bfdde 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -12,6 +12,22 @@ from helpers.network import _NetworkManager
 logging.raiseExceptions = False
 
 
+@pytest.fixture(autouse=True, scope="session")
+def tune_local_port_range():
+    # Lots of services uses non privileged ports:
+    # - hdfs -- 50020/50070/...
+    # - minio
+    # - mysql
+    # - psql
+    #
+    # So instead of tuning all these thirdparty services, let's simply
+    # prohibit using such ports for outgoing connections, this should fix
+    # possible "Address already in use" errors.
+    #
+    # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error.
+    run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True)
+
+
 @pytest.fixture(autouse=True, scope="session")
 def cleanup_environment():
     try:

From 1fe8076b9422e09b78887b44a724cf1cae5d7fd8 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 20 Jul 2023 12:38:57 +0200
Subject: [PATCH 157/242] Fix capabilities installed via systemd service (fixes
 netlink/IO priorities)

CapabilityBoundingSet that contained in systemd unit before is about
allowing to set some capabilities, not about granting them.

To grant them you need to use AmbientCapabilities.

And if you do not use 'clickhouse install' then:
- IO priorities was unavailable (since they requires CAP_SYS_NICE)
- For taskstats the procfs was used instead of netlink

Not a big deal, but still.

Here how it had been tested:

    $ systemd-run -p CapabilityBoundingSet=CAP_NET_ADMIN --shell
    root:/etc (master)# capsh --print
    Current: cap_net_admin=ep
    Bounding set =cap_net_admin
    Ambient set =

    $ systemd-run -p User=azat -p CapabilityBoundingSet=CAP_NET_ADMIN --shell
    azat:/etc$ capsh --print
    Current: =
    Bounding set =cap_net_admin
    Ambient set =

    $ systemd-run -p User=azat -p AmbientCapabilities=CAP_NET_ADMIN -p CapabilityBoundingSet=CAP_NET_ADMIN --shell
    azat:/etc$ capsh --print
    Current: cap_net_admin=eip
    Bounding set =cap_net_admin
    Ambient set =cap_net_admin

Note, if you are running it under root (without changing user) you don't
need to specify AmbientCapabilities additionally, because root has all
capabilities by default and they had been inherited.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 packages/clickhouse-server.service | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 7742d8b278a..42dc5bd380d 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse
 LimitCORE=infinity
 LimitNOFILE=500000
 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
+AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
 
 [Install]
 # ClickHouse should not start from the rescue shell (rescue.target).

From 8ec8388a9ef063beb02b430ae4b89dfe5bab9ddd Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 21 Jul 2023 14:53:02 +0200
Subject: [PATCH 158/242] Update gtest_lru_file_cache.cpp

---
 src/Interpreters/tests/gtest_lru_file_cache.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index b9d12c8ed42..12e7d9372f7 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get)
 
                 auto & file_segment2 = get(holder2, 2);
                 ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
+                ASSERT_EQ(file_segment2.state(), State::DOWNLOADING);
 
                 {
                     std::lock_guard lock(mutex);
@@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get)
                 cv.notify_one();
 
                 file_segment2.wait(file_segment2.range().right);
-                file_segment2.complete();
-                ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
+                ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size());
             });
 
             {
@@ -488,7 +488,8 @@ TEST_F(FileCacheTest, get)
             }
 
             download(file_segment);
-            ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
+            ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
+            file_segment.completePartAndResetDownloader();
 
             other_1.join();
 

From 4695ec6802c80d25e93a7b523821840c10a3b200 Mon Sep 17 00:00:00 2001
From: Alexander Sapin <alesapin@gmail.com>
Date: Fri, 21 Jul 2023 14:56:29 +0200
Subject: [PATCH 159/242] Add an ability to specify allocations size for
 sampling memory profiler

---
 programs/server/Server.cpp                    | 21 ++++++++++-----
 src/Common/MemoryTracker.cpp                  | 11 ++++++--
 src/Common/MemoryTracker.h                    | 18 +++++++++++++
 src/Core/ServerSettings.h                     |  8 ++++--
 src/Core/Settings.h                           |  4 ++-
 src/Interpreters/ProcessList.cpp              |  3 +++
 src/Interpreters/ThreadStatusExt.cpp          |  2 ++
 .../__init__.py                               |  1 +
 .../configs/max_untracked_memory.xml          |  7 +++++
 .../configs/memory_profiler.xml               |  5 ++++
 .../test.py                                   | 27 +++++++++++++++++++
 ...r_sample_min_max_allocation_size.reference |  1 +
 ...profiler_sample_min_max_allocation_size.sh | 18 +++++++++++++
 13 files changed, 115 insertions(+), 11 deletions(-)
 create mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py
 create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
 create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
 create mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py
 create mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
 create mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 948824242fb..71bf8cc9e89 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1643,17 +1643,26 @@ try
         global_context->initializeTraceCollector();
 
         /// Set up server-wide memory profiler (for total memory tracker).
-        UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
-        if (total_memory_profiler_step)
+        if (server_settings.total_memory_profiler_step)
         {
-            total_memory_tracker.setProfilerStep(total_memory_profiler_step);
+            total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
         }
 
-        double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
-        if (total_memory_tracker_sample_probability > 0.0)
+        if (server_settings.total_memory_tracker_sample_probability > 0.0)
         {
-            total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
+            total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
         }
+
+        if (server_settings.total_memory_profiler_sample_min_allocation_size)
+        {
+            total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
+        }
+
+        if (server_settings.total_memory_profiler_sample_max_allocation_size)
+        {
+            total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
+        }
+
     }
 #endif
 
diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 81cac2617c5..52cd9cc8073 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
     }
 
     std::bernoulli_distribution sample(sample_probability);
-    if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
+    if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
     {
         MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
         DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size});
@@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size)
     }
 
     std::bernoulli_distribution sample(sample_probability);
-    if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
+    if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
     {
         MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
         DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size});
@@ -534,6 +534,13 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
         ;
 }
 
+bool MemoryTracker::isSizeOkForSampling(UInt64 size) const
+{
+    //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes);
+    /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation
+    return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes);
+}
+
 bool canEnqueueBackgroundTask()
 {
     auto limit = background_memory_tracker.getSoftLimit();
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 4e29d40c953..768dc8a7404 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -67,6 +67,12 @@ private:
     /// To randomly sample allocations and deallocations in trace_log.
     double sample_probability = 0;
 
+    /// Randomly sample allocations only larger or equal to this size
+    UInt64 min_allocation_size_bytes = 0;
+
+    /// Randomly sample allocations only smaller or equal to this size
+    UInt64 max_allocation_size_bytes = 0;
+
     /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
     /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
     std::atomic<MemoryTracker *> parent {};
@@ -88,6 +94,8 @@ private:
 
     void setOrRaiseProfilerLimit(Int64 value);
 
+    bool isSizeOkForSampling(UInt64 size) const;
+
     /// allocImpl(...) and free(...) should not be used directly
     friend struct CurrentMemoryTracker;
     void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
@@ -165,6 +173,16 @@ public:
         sample_probability = value;
     }
 
+    void setSampleMinAllocationSize(UInt64 value)
+    {
+        min_allocation_size_bytes = value;
+    }
+
+    void setSampleMaxAllocationSize(UInt64 value)
+    {
+        max_allocation_size_bytes = value;
+    }
+
     void setProfilerStep(Int64 value)
     {
         profiler_step = value;
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 1a9f226041b..f7a6c9e950e 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -81,8 +81,12 @@ namespace DB
     M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
     M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
     M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
-    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0)
-
+    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
+    \
+    M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
+    M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
+    M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0)
 
 DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6fb26994d2f..bcfc179be5e 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -426,7 +426,9 @@ class IColumn;
     M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \
     M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
     M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
-    M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
+    M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
+    M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
     M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \
     \
     M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 1503e396298..c299572ef41 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
             {
                 /// Set up memory profiling
                 thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
+
                 thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
+                thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
+                thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
                 thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events);
             }
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 5acfe500b1d..49d9d3ccdf6 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex
     const Settings & settings = storage_context->getSettingsRef();
     group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
     group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
+    group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
+    group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
     group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
     group->memory_tracker.setParent(&background_memory_tracker);
     if (settings.memory_tracker_fault_probability > 0.0)
diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
new file mode 100644
index 00000000000..56fc5ed34ca
--- /dev/null
+++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <max_untracked_memory>1</max_untracked_memory>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
new file mode 100644
index 00000000000..5b3e17d145f
--- /dev/null
+++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <total_memory_tracker_sample_probability>1</total_memory_tracker_sample_probability>
+    <total_memory_profiler_sample_min_allocation_size>4096</total_memory_profiler_sample_min_allocation_size>
+    <total_memory_profiler_sample_max_allocation_size>8192</total_memory_profiler_sample_max_allocation_size>
+</clickhouse>
diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py
new file mode 100644
index 00000000000..b768a442591
--- /dev/null
+++ b/tests/integration/test_memory_profiler_min_max_borders/test.py
@@ -0,0 +1,27 @@
+from helpers.cluster import ClickHouseCluster
+import pytest
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/memory_profiler.xml"],
+    user_configs=["configs/max_untracked_memory.xml"],
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_trace_boundaries_work(started_cluster):
+    node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null")
+    node.query("SYSTEM FLUSH LOGS")
+
+    assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n"
+    assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n"
diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh
new file mode 100755
index 00000000000..b1fbea26da7
--- /dev/null
+++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings
+# requires TraceCollector, does not available under sanitizers and aarch64
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM"
+${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null"
+
+${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS"
+
+# at least something allocated
+${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'"
+
+# show wrong allocations
+${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)"

From 8d4c840e2da0401787bafa2239907ff59160a003 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Fri, 21 Jul 2023 15:15:35 +0200
Subject: [PATCH 160/242] Some more cases

---
 tests/queries/0_stateless/01590_countSubstrings.reference | 3 +++
 tests/queries/0_stateless/01590_countSubstrings.sql       | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tests/queries/0_stateless/01590_countSubstrings.reference b/tests/queries/0_stateless/01590_countSubstrings.reference
index 95031cd3856..367b910e569 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.reference
+++ b/tests/queries/0_stateless/01590_countSubstrings.reference
@@ -9,6 +9,9 @@ empty
 0
 0
 0
+0
+0
+0
 char
 1
 2
diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql
index 6d2d87b1260..b38cbb7d188 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.sql
+++ b/tests/queries/0_stateless/01590_countSubstrings.sql
@@ -14,6 +14,9 @@ select countSubstrings('', '');
 select countSubstrings('.', '');
 select countSubstrings(toString(number), '') from numbers(1);
 select countSubstrings('', toString(number)) from numbers(1);
+select countSubstrings('aaa', materialize(''));
+select countSubstrings(materialize('aaa'), '');
+select countSubstrings(materialize('aaa'), materialize(''));
 
 select 'char';
 select countSubstrings('foobar.com', '.');

From cb53d762eae54eef9411e2cf7548927f83fe187b Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Fri, 21 Jul 2023 15:23:24 +0200
Subject: [PATCH 161/242] Fix one more case

---
 src/Functions/CountSubstringsImpl.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h
index 1501e50afcf..8ba9ee99de8 100644
--- a/src/Functions/CountSubstringsImpl.h
+++ b/src/Functions/CountSubstringsImpl.h
@@ -226,16 +226,19 @@ struct CountSubstringsImpl
                 const char * needle_beg = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
                 size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
 
-                typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(needle_beg, needle_size);
-
-                const UInt8 * end = reinterpret_cast<const UInt8 *>(haystack.data() + haystack.size());
-                const UInt8 * beg = reinterpret_cast<const UInt8 *>(Impl::advancePos(haystack.data(), reinterpret_cast<const char *>(end), start - 1));
-
-                const UInt8 * pos;
-                while ((pos = searcher.search(beg, end)) < end)
+                if (needle_size > 0)
                 {
-                    ++res[i];
-                    beg = pos + needle_size;
+                    typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(needle_beg, needle_size);
+
+                    const UInt8 * end = reinterpret_cast<const UInt8 *>(haystack.data() + haystack.size());
+                    const UInt8 * beg = reinterpret_cast<const UInt8 *>(Impl::advancePos(haystack.data(), reinterpret_cast<const char *>(end), start - 1));
+
+                    const UInt8 * pos;
+                    while ((pos = searcher.search(beg, end)) < end)
+                    {
+                        ++res[i];
+                        beg = pos + needle_size;
+                    }
                 }
             }
 

From abd8bfed2b6e6c20b46ffbeb82699c8530523ffe Mon Sep 17 00:00:00 2001
From: Alexander Sapin <alesapin@gmail.com>
Date: Fri, 21 Jul 2023 15:44:49 +0200
Subject: [PATCH 162/242] Remove comment

---
 src/Common/MemoryTracker.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 52cd9cc8073..52cae0768dc 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -536,7 +536,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
 
 bool MemoryTracker::isSizeOkForSampling(UInt64 size) const
 {
-    //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes);
     /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation
     return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes);
 }

From c080e9b450faeaced13c149212456ab006648c3a Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 21 Jul 2023 21:48:49 +0800
Subject: [PATCH 163/242] Fix normal projection with merge table

---
 .../Optimizations/optimizeUseNormalProjection.cpp     |  8 ++++++--
 ..._projection_query_plan_optimization_misc.reference |  1 +
 .../01710_projection_query_plan_optimization_misc.sql | 11 +++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference
 create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index dd7a5d449bc..2a03a082d89 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
             break;
     }
 
+    /// Dangling query plan node. This might be generated by StorageMerge.
+    if (iter->node->step.get() == reading)
+        return false;
+
     const auto metadata = reading->getStorageMetadata();
     const auto & projections = metadata->projections;
 
@@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
 
     QueryDAG query;
     {
-        auto & clild = iter->node->children[iter->next_child - 1];
-        if (!query.build(*clild))
+        auto & child = iter->node->children[iter->next_child - 1];
+        if (!query.build(*child))
             return false;
 
         if (query.dag)
diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference
new file mode 100644
index 00000000000..9874d6464ab
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference
@@ -0,0 +1 @@
+1	2
diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql
new file mode 100644
index 00000000000..cb565313380
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql
@@ -0,0 +1,11 @@
+drop table if exists t;
+
+create table t (x Int32, codectest Int32) engine = MergeTree order by x;
+
+alter table t add projection x (select * order by codectest);
+
+insert into t values (1, 2);
+
+select * from merge('', 't');
+
+drop table t;

From 2a6b96f9e339e602c59968741741e57b1675bf52 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 21 Jul 2023 13:51:40 +0000
Subject: [PATCH 164/242] Automatic style fix

---
 .../test_memory_profiler_min_max_borders/test.py   | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py
index b768a442591..6ab971fa9c4 100644
--- a/tests/integration/test_memory_profiler_min_max_borders/test.py
+++ b/tests/integration/test_memory_profiler_min_max_borders/test.py
@@ -23,5 +23,15 @@ def test_trace_boundaries_work(started_cluster):
     node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null")
     node.query("SYSTEM FLUSH LOGS")
 
-    assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n"
-    assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n"
+    assert (
+        node.query(
+            "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'"
+        )
+        == "1\n"
+    )
+    assert (
+        node.query(
+            "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)"
+        )
+        == "0\n"
+    )

From 3acb6005f041051b7c00c48df5035843744a7e24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 21 Jul 2023 17:08:01 +0200
Subject: [PATCH 165/242] Reduce the number of syscalls in
 FileCache::loadMetadata

---
 src/Interpreters/Cache/FileCache.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 91d1c63e832..42cc7b80a66 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -870,13 +870,12 @@ void FileCache::loadMetadata()
     }
 
     size_t total_size = 0;
-    for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()};
-         key_prefix_it != fs::directory_iterator();)
+    for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; key_prefix_it != fs::directory_iterator();
+         key_prefix_it++)
     {
         const fs::path key_prefix_directory = key_prefix_it->path();
-        key_prefix_it++;
 
-        if (!fs::is_directory(key_prefix_directory))
+        if (!key_prefix_it->is_directory())
         {
             if (key_prefix_directory.filename() != "status")
             {
@@ -887,19 +886,19 @@ void FileCache::loadMetadata()
             continue;
         }
 
-        if (fs::is_empty(key_prefix_directory))
+        fs::directory_iterator key_it{key_prefix_directory};
+        if (key_it == fs::directory_iterator{})
         {
             LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string());
             fs::remove(key_prefix_directory);
             continue;
         }
 
-        for (fs::directory_iterator key_it{key_prefix_directory}; key_it != fs::directory_iterator();)
+        for (/* key_it already initialized to verify emptiness */; key_it != fs::directory_iterator(); key_it++)
         {
             const fs::path key_directory = key_it->path();
-            ++key_it;
 
-            if (!fs::is_directory(key_directory))
+            if (!key_it->is_directory())
             {
                 LOG_DEBUG(
                     log,
@@ -908,7 +907,7 @@ void FileCache::loadMetadata()
                 continue;
             }
 
-            if (fs::is_empty(key_directory))
+            if (fs::directory_iterator{key_directory} == fs::directory_iterator{})
             {
                 LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string());
                 fs::remove(key_directory);

From 6c8d5ca0a554ecc4fee32269858797d139f3c02a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 21 Jul 2023 21:33:51 +0000
Subject: [PATCH 166/242] Fix: remove redundant distinct with views

---
 src/Interpreters/ActionsDAG.cpp               | 18 +++++++++++----
 ...x_remove_dedundant_distinct_view.reference | 13 +++++++++++
 ...810_fix_remove_dedundant_distinct_view.sql | 22 +++++++++++++++++++
 3 files changed, 49 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
 create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 906875dd314..ce273e78ff3 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -2511,11 +2511,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr
         /// find input node which refers to the output node
         /// consider only aliases on the path
         const auto * node = output_node;
-        while (node && node->type == ActionsDAG::ActionType::ALIAS)
+        while (node)
         {
-            /// alias has only one child
-            chassert(node->children.size() == 1);
-            node = node->children.front();
+            if (node->type == ActionsDAG::ActionType::ALIAS)
+            {
+                node = node->children.front();
+            }
+            /// materiailze can occure when dealing with views, special case
+            /// TODO: not sure if it should be done here, looks too generic place
+            else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize")
+            {
+                chassert(node->children.size() == 1);
+                node = node->children.front();
+            }
+            else
+                break;
         }
         if (node && node->type == ActionsDAG::ActionType::INPUT)
             index.emplace(output_node->result_name, node);
diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
new file mode 100644
index 00000000000..01f14f82e94
--- /dev/null
+++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
@@ -0,0 +1,13 @@
+-- { echoOn }
+set query_plan_remove_redundant_distinct=1;
+-- DISTINCT has to be removed since the view already has DISTINCT on the same column
+SELECT count()
+FROM
+(
+    EXPLAIN SELECT DISTINCT x FROM tab_v
+)
+WHERE explain ILIKE '%distinct%';
+2
+SELECT DISTINCT x FROM tab_v;
+2
+1
diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
new file mode 100644
index 00000000000..99fc24dae8b
--- /dev/null
+++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
@@ -0,0 +1,22 @@
+set allow_experimental_analyzer=1;
+
+drop table if exists tab_v;
+drop table if exists tab;
+create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y);
+insert into tab values(1, 1);
+insert into tab values(1, 2);
+insert into tab values(2, 1);
+
+create view tab_v as select distinct(x) from tab;
+
+-- { echoOn }
+set query_plan_remove_redundant_distinct=1;
+-- DISTINCT has to be removed since the view already has DISTINCT on the same column
+SELECT count()
+FROM
+(
+    EXPLAIN SELECT DISTINCT x FROM tab_v
+)
+WHERE explain ILIKE '%distinct%';
+
+SELECT DISTINCT x FROM tab_v;

From c6ffc9f266f1bb8a667a3d5beff9bd47a288ef74 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 01:05:39 +0300
Subject: [PATCH 167/242] Update 02815_fix_not_found_constants_col_in_block.sql

---
 .../0_stateless/02815_fix_not_found_constants_col_in_block.sql   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql
index c56d59c72d6..fa784cf12e3 100644
--- a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql
+++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql
@@ -3,3 +3,4 @@ CREATE TABLE t0 (vkey UInt32, c0 Float32, primary key(c0)) engine = AggregatingM
 insert into t0 values (19000, 1);
 select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc, c_2_2 asc;
 select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc;
+DROP TABLE t0;

From 687cbc57bba42a67b62b1b717e51a5be7e14b733 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 21 Jul 2023 22:15:02 +0000
Subject: [PATCH 168/242] Fix typo

---
 src/Interpreters/ActionsDAG.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index ce273e78ff3..284c42b658a 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -2517,7 +2517,7 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr
             {
                 node = node->children.front();
             }
-            /// materiailze can occure when dealing with views, special case
+            /// materiailze() function can occur when dealing with views
             /// TODO: not sure if it should be done here, looks too generic place
             else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize")
             {

From 9a5aed35e24a9aa4d7de71971665449cf344f917 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 02:33:44 +0200
Subject: [PATCH 169/242] Add a note about potential caveats for the
 "session_timezone" setting

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index cfcb56729d2..f267fa15276 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -775,7 +775,7 @@ class IColumn;
     M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
     M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
-    M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
+    M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
     // End of COMMON_SETTINGS
     // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

From e68234a231bf234d60ccfa262ca5a2374fb4f98a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:45:50 +0300
Subject: [PATCH 170/242] Revert "Re-add SipHash keyed functions"

---
 .../sql-reference/functions/hash-functions.md |   8 +-
 src/Functions/FunctionsHashing.h              | 329 +++++-------------
 src/Functions/FunctionsHashingMisc.cpp        |   5 -
 .../0_stateless/02534_keyed_siphash.reference |  37 --
 .../0_stateless/02534_keyed_siphash.sql       |  61 +---
 .../02552_siphash128_reference.reference      | 151 --------
 .../02552_siphash128_reference.sql            | 253 --------------
 7 files changed, 99 insertions(+), 745 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 556fe622c27..06097d92480 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
 If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
 If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
 
-## sipHash64 {#hash_functions-siphash64}
+## sipHash64 (#hash_functions-siphash64)
 
 Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
 
@@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
 
 The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
 
-1. The first and the second hash value are concatenated to an array which is hashed.
-2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
-3. This calculation is repeated for all remaining hash values of the original input.
+1.  The first and the second hash value are concatenated to an array which is hashed.
+2.  The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
+3.  This calculation is repeated for all remaining hash values of the original input.
 
 **Arguments**
 
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 82944630b10..279294b367c 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -79,51 +79,28 @@ namespace impl
         UInt64 key1 = 0;
     };
 
-    struct SipHashKeyColumns
+    static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
     {
-        ColumnPtr key0;
-        ColumnPtr key1;
-        bool is_const;
+        SipHashKey ret{};
 
-        size_t size() const
-        {
-            assert(key0 && key1);
-            assert(key0->size() == key1->size());
-            return key0->size();
-        }
-        SipHashKey getKey(size_t i) const
-        {
-            if (is_const)
-                i = 0;
-            const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
-            const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
-            return {key0data[i], key1data[i]};
-        }
-    };
-
-    static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
-    {
-        const ColumnTuple * tuple = nullptr;
-        const auto * column = key.column.get();
-        bool is_const = false;
-        if (isColumnConst(*column))
-        {
-            is_const = true;
-            tuple = checkAndGetColumnConstData<ColumnTuple>(column);
-        }
-        else
-            tuple = checkAndGetColumn<ColumnTuple>(column);
+        const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
         if (!tuple)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
+
         if (tuple->tupleSize() != 2)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
 
-        SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
-        assert(ret.key0);
-        if (!checkColumn<ColumnUInt64>(*ret.key0))
+        if (tuple->empty())
+            return ret;
+
+        if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
+            ret.key0 = key0col->get64(0);
+        else
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
-        assert(ret.key1);
-        if (!checkColumn<ColumnUInt64>(*ret.key1))
+
+        if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
+            ret.key1 = key1col->get64(0);
+        else
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
 
         return ret;
@@ -352,10 +329,8 @@ struct SipHash64KeyedImpl
     static constexpr auto name = "sipHash64Keyed";
     using ReturnType = UInt64;
     using Key = impl::SipHashKey;
-    using KeyColumns = impl::SipHashKeyColumns;
 
-    static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
-    static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
+    static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
 
     static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
 
@@ -396,10 +371,8 @@ struct SipHash128KeyedImpl
     static constexpr auto name = "sipHash128Keyed";
     using ReturnType = UInt128;
     using Key = impl::SipHashKey;
-    using KeyColumns = impl::SipHashKeyColumns;
 
-    static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
-    static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
+    static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
 
     static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
 
@@ -425,43 +398,13 @@ struct SipHash128ReferenceImpl
 
     using ReturnType = UInt128;
 
-    static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
+    static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
 
     static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
 
     static constexpr bool use_int_hash_for_pods = false;
 };
 
-struct SipHash128ReferenceKeyedImpl
-{
-    static constexpr auto name = "sipHash128ReferenceKeyed";
-    using ReturnType = UInt128;
-    using Key = impl::SipHashKey;
-    using KeyColumns = impl::SipHashKeyColumns;
-
-    static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
-    static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
-
-    static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
-    {
-        return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
-    }
-
-    static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
-    {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        UInt128 tmp;
-        reverseMemcpy(&tmp, &h1, sizeof(UInt128));
-        h1 = tmp;
-        reverseMemcpy(&tmp, &h2, sizeof(UInt128));
-        h2 = tmp;
-#endif
-        UInt128 hashes[] = {h1, h2};
-        return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
-    }
-
-    static constexpr bool use_int_hash_for_pods = false;
-};
 
 /** Why we need MurmurHash2?
   * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
@@ -1080,7 +1023,7 @@ private:
 
 DECLARE_MULTITARGET_CODE(
 
-template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
+template <typename Impl, bool Keyed, typename KeyType>
 class FunctionAnyHash : public IFunction
 {
 public:
@@ -1090,12 +1033,9 @@ private:
     using ToType = typename Impl::ReturnType;
 
     template <typename FromType, bool first>
-    void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
     {
         using ColVecType = ColumnVectorOrDecimal<FromType>;
-        KeyType key{};
-        if constexpr (Keyed)
-            key = Impl::getKey(key_cols, 0);
 
         if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
         {
@@ -1104,9 +1044,6 @@ private:
             for (size_t i = 0; i < size; ++i)
             {
                 ToType hash;
-                if constexpr (Keyed)
-                    if (!key_cols.is_const && i != 0)
-                        key = Impl::getKey(key_cols, i);
 
                 if constexpr (Impl::use_int_hash_for_pods)
                 {
@@ -1140,14 +1077,6 @@ private:
         }
         else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
         {
-            if constexpr (Keyed)
-            {
-                if (!key_cols.is_const)
-                {
-                    ColumnPtr full_column = col_from_const->convertToFullColumn();
-                    return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
-                }
-            }
             auto value = col_from_const->template getValue<FromType>();
             ToType hash;
 
@@ -1178,15 +1107,8 @@ private:
             if constexpr (first)
                 vec_to.assign(size, hash);
             else
-            {
                 for (size_t i = 0; i < size; ++i)
-                {
-                    if constexpr (Keyed)
-                        if (!key_cols.is_const && i != 0)
-                            key = Impl::getKey(key_cols, i);
                     vec_to[i] = combineHashes(key, vec_to[i], hash);
-                }
-            }
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@@ -1194,12 +1116,9 @@ private:
     }
 
     template <typename FromType, bool first>
-    void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
     {
         using ColVecType = ColumnVectorOrDecimal<FromType>;
-        KeyType key{};
-        if constexpr (Keyed)
-            key = Impl::getKey(key_cols, 0);
 
         if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
         {
@@ -1208,9 +1127,6 @@ private:
             for (size_t i = 0; i < size; ++i)
             {
                 ToType hash;
-                if constexpr (Keyed)
-                    if (!key_cols.is_const && i != 0)
-                        key = Impl::getKey(key_cols, i);
                 if constexpr (std::endian::native == std::endian::little)
                     hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
                 else
@@ -1227,14 +1143,6 @@ private:
         }
         else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
         {
-            if constexpr (Keyed)
-            {
-                if (!key_cols.is_const)
-                {
-                    ColumnPtr full_column = col_from_const->convertToFullColumn();
-                    return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
-                }
-            }
             auto value = col_from_const->template getValue<FromType>();
 
             ToType hash;
@@ -1250,15 +1158,8 @@ private:
             if constexpr (first)
                 vec_to.assign(size, hash);
             else
-            {
                 for (size_t i = 0; i < size; ++i)
-                {
-                    if constexpr (Keyed)
-                        if (!key_cols.is_const && i != 0)
-                            key = Impl::getKey(key_cols, i);
                     vec_to[i] = combineHashes(key, vec_to[i], hash);
-                }
-            }
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@@ -1266,16 +1167,10 @@ private:
     }
 
     template <bool first>
-    void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
     {
-        KeyType key{};
-        if constexpr (Keyed)
-            key = Impl::getKey(key_cols, 0);
         for (size_t i = 0, size = column->size(); i < size; ++i)
         {
-            if constexpr (Keyed)
-                if (!key_cols.is_const && i != 0)
-                    key = Impl::getKey(key_cols, i);
             StringRef bytes = column->getDataAt(i);
             const ToType hash = apply(key, bytes.data, bytes.size);
             if constexpr (first)
@@ -1286,11 +1181,8 @@ private:
     }
 
     template <bool first>
-    void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
     {
-        KeyType key{};
-        if constexpr (Keyed)
-            key = Impl::getKey(key_cols, 0);
         if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
         {
             const typename ColumnString::Chars & data = col_from->getChars();
@@ -1300,9 +1192,6 @@ private:
             ColumnString::Offset current_offset = 0;
             for (size_t i = 0; i < size; ++i)
             {
-                if constexpr (Keyed)
-                    if (!key_cols.is_const && i != 0)
-                        key = Impl::getKey(key_cols, i);
                 const ToType hash = apply(key,
                     reinterpret_cast<const char *>(&data[current_offset]),
                     offsets[i] - current_offset - 1);
@@ -1323,9 +1212,6 @@ private:
 
             for (size_t i = 0; i < size; ++i)
             {
-                if constexpr (Keyed)
-                    if (!key_cols.is_const && i != 0)
-                        key = Impl::getKey(key_cols, i);
                 const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
                 if constexpr (first)
                     vec_to[i] = hash;
@@ -1335,14 +1221,6 @@ private:
         }
         else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
         {
-            if constexpr (Keyed)
-            {
-                if (!key_cols.is_const)
-                {
-                    ColumnPtr full_column = col_from_const->convertToFullColumn();
-                    return executeString<first>(key_cols, full_column.get(), vec_to);
-                }
-            }
             String value = col_from_const->getValue<String>();
             const ToType hash = apply(key, value.data(), value.size());
             const size_t size = vec_to.size();
@@ -1350,15 +1228,8 @@ private:
             if constexpr (first)
                 vec_to.assign(size, hash);
             else
-            {
                 for (size_t i = 0; i < size; ++i)
-                {
-                    if constexpr (Keyed)
-                        if (!key_cols.is_const && i != 0)
-                            key = Impl::getKey(key_cols, i);
                     vec_to[i] = combineHashes(key, vec_to[i], hash);
-                }
-            }
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@@ -1366,7 +1237,7 @@ private:
     }
 
     template <bool first>
-    void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
     {
         const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
 
@@ -1378,19 +1249,13 @@ private:
 
             typename ColumnVector<ToType>::Container vec_temp(nested_size);
             bool nested_is_first = true;
-            executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
+            executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
 
             const size_t size = offsets.size();
 
             ColumnArray::Offset current_offset = 0;
-            KeyType key{};
-            if constexpr (Keyed)
-                key = Impl::getKey(key_cols, 0);
             for (size_t i = 0; i < size; ++i)
             {
-                if constexpr (Keyed)
-                    if (!key_cols.is_const && i != 0)
-                        key = Impl::getKey(key_cols, i);
                 ColumnArray::Offset next_offset = offsets[i];
 
                 ToType hash;
@@ -1414,7 +1279,7 @@ private:
         {
             /// NOTE: here, of course, you can do without the materialization of the column.
             ColumnPtr full_column = col_from_const->convertToFullColumn();
-            executeArray<first>(key_cols, type, full_column.get(), vec_to);
+            executeArray<first>(key, type, full_column.get(), vec_to);
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@@ -1422,7 +1287,7 @@ private:
     }
 
     template <bool first>
-    void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
+    void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
     {
         WhichDataType which(from_type);
 
@@ -1430,45 +1295,40 @@ private:
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
                     icolumn->getName(), icolumn->size(), vec_to.size(), getName());
 
-        if constexpr (Keyed)
-            if ((!key_cols.is_const && key_cols.size() != vec_to.size())
-                || (key_cols.is_const && key_cols.size() != 1))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
-
-        if      (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
-        else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
-        else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
-        else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
-        else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
-        else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
-        else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
-        else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
-        else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
-        else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
-        else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
-        else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
-        else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
-        else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
-        else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
+        if      (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
+        else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
+        else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
+        else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
+        else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
+        else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
+        else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
+        else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
+        else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
+        else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
+        else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
+        else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
+        else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
+        else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
+        else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
+        else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
+        else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
+        else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
+        else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
+        else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
         /// TODO: executeIntType() for Decimal32/64 leads to incompatible result
-        else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
-        else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
-        else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
-        else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
-        else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
-        else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
-        else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
-        else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
-        else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
-        else executeGeneric<first>(key_cols, icolumn, vec_to);
+        else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
+        else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
+        else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
+        else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
+        else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
+        else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
+        else if (which.isString()) executeString<first>(key, icolumn, vec_to);
+        else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
+        else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
+        else executeGeneric<first>(key, icolumn, vec_to);
     }
 
-    void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
+    void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
     {
         /// Flattening of tuples.
         if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
@@ -1477,7 +1337,7 @@ private:
             const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
             size_t tuple_size = tuple_columns.size();
             for (size_t i = 0; i < tuple_size; ++i)
-                executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
+                executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
         }
         else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
         {
@@ -1487,24 +1347,24 @@ private:
             for (size_t i = 0; i < tuple_size; ++i)
             {
                 auto tmp = ColumnConst::create(tuple_columns[i], column->size());
-                executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
+                executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
             }
         }
         else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
         {
             const auto & type_map = assert_cast<const DataTypeMap &>(*type);
-            executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
+            executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
         }
         else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
         {
-            executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
+            executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
         }
         else
         {
             if (is_first)
-                executeAny<true>(key_cols, type, column, vec_to);
+                executeAny<true>(key, type, column, vec_to);
             else
-                executeAny<false>(key_cols, type, column, vec_to);
+                executeAny<false>(key, type, column, vec_to);
         }
 
         is_first = false;
@@ -1535,33 +1395,30 @@ public:
     {
         auto col_to = ColumnVector<ToType>::create(input_rows_count);
 
-        if (input_rows_count != 0)
+        typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
+
+        /// If using a "keyed" algorithm, the first argument is the key and
+        /// the data starts from the second argument.
+        /// Otherwise there is no key and all arguments are interpreted as data.
+        constexpr size_t first_data_argument = Keyed;
+
+        if (arguments.size() <= first_data_argument)
         {
-            typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
+            /// Return a fixed random-looking magic number when input is empty
+            vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
+        }
 
-            /// If using a "keyed" algorithm, the first argument is the key and
-            /// the data starts from the second argument.
-            /// Otherwise there is no key and all arguments are interpreted as data.
-            constexpr size_t first_data_argument = Keyed;
+        KeyType key{};
+        if constexpr (Keyed)
+            if (!arguments.empty())
+                key = Impl::parseKey(arguments[0]);
 
-            if (arguments.size() <= first_data_argument)
-            {
-                /// Return a fixed random-looking magic number when input is empty
-                vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
-            }
-
-            KeyColumnsType key_cols{};
-            if constexpr (Keyed)
-                if (!arguments.empty())
-                    key_cols = Impl::parseKeyColumns(arguments[0]);
-
-            /// The function supports arbitrary number of arguments of arbitrary types.
-            bool is_first_argument = true;
-            for (size_t i = first_data_argument; i < arguments.size(); ++i)
-            {
-                const auto & col = arguments[i];
-                executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
-            }
+        /// The function supports arbitrary number of arguments of arbitrary types.
+        bool is_first_argument = true;
+        for (size_t i = first_data_argument; i < arguments.size(); ++i)
+        {
+            const auto & col = arguments[i];
+            executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
         }
 
         if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
@@ -1593,19 +1450,17 @@ public:
 
 ) // DECLARE_MULTITARGET_CODE
 
-template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
-class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
+template <typename Impl, bool Keyed = false, typename KeyType = char>
+class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
 {
 public:
     explicit FunctionAnyHash(ContextPtr context) : selector(context)
     {
-        selector
-            .registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
+        selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
 
 #if USE_MULTITARGET_CODE
-        selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
-        selector
-            .registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
+        selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
+        selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
 #endif
     }
 
@@ -1841,7 +1696,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
 struct NameIntHash64 { static constexpr auto name = "intHash64"; };
 
 using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
-using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
+using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
 using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
 using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
 #if USE_SSL
@@ -1855,10 +1710,8 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
 using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
 #endif
 using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
-using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
+using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
 using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
-using FunctionSipHash128ReferenceKeyed
-    = FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
 using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
 using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
 using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp
index f56568b2508..56c3c1ed00c 100644
--- a/src/Functions/FunctionsHashingMisc.cpp
+++ b/src/Functions/FunctionsHashingMisc.cpp
@@ -20,11 +20,6 @@ REGISTER_FUNCTION(Hashing)
         .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
         .categories{"Hash"}
     });
-    factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
-        .description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
-                       "instead of using a fixed key.",
-        .examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
-        .categories{"Hash"}});
     factory.registerFunction<FunctionCityHash64>();
     factory.registerFunction<FunctionFarmFingerprint64>();
     factory.registerFunction<FunctionFarmHash64>();
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index a9f724365a8..ccc514e7ea2 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -197,40 +197,3 @@ E28DBDE7FE22E41C
 Check bug with hashing of const integer values
 11862823756610506724
 11862823756610506724
-86AE90BB6A238D3F6221457630142C9B
-86AE90BB6A238D3F6221457630142C9B
-Check memsan bug
-18096612095653370192
-20AF99D3A87829E0
-12489502208762728797
-Check const columns
-15080046610211022027
-15080046610211022027
-15080046610211022027
-15080046610211022027
-2E779C73D13981AA1AE19AFF9617EA49
-2E779C73D13981AA1AE19AFF9617EA49
-2E779C73D13981AA1AE19AFF9617EA49
-2E779C73D13981AA1AE19AFF9617EA49
-Check multiple keys as tuple from a table
-11862823756610506724
-9357996107237883963
-86AE90BB6A238D3F6221457630142C9B
-F6D93D8FEA6D7DECCDD95A7A0A2AA36D
-Check multiple keys as separate ints from a table
-11862823756610506724
-9357996107237883963
-86AE90BB6A238D3F6221457630142C9B
-F6D93D8FEA6D7DECCDD95A7A0A2AA36D
-Check constant key and data from a table
-11862823756610506724
-11862823756610506724
-86AE90BB6A238D3F6221457630142C9B
-86AE90BB6A238D3F6221457630142C9B
-Check multiple keys as separate ints from a table with constant data
-11862823756610506724
-9357996107237883963
-86AE90BB6A238D3F6221457630142C9B
-F6D93D8FEA6D7DECCDD95A7A0A2AA36D
-Check asan bug
-0
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index 4f3ae7d62bd..900b99f548a 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8,
 select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62));
 select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63));
 
-select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
+select sipHash64Keyed((0, 0), '1'); -- { serverError 48 }
+select sipHash128Keyed((0, 0), '1'); -- { serverError 48 }
+select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 }
+select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 }
 
 select hex(sipHash64());
 SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000';
@@ -280,57 +280,4 @@ INSERT INTO tab VALUES ((2, 2), 4);
 -- these two statements must produce the same result
 SELECT sipHash64Keyed(key, val) FROM tab;
 SELECT sipHash64Keyed(key, 4::UInt64) FROM tab;
-SELECT hex(sipHash128Keyed(key, val)) FROM tab;
-SELECT hex(sipHash128Keyed(key, 4::UInt64)) FROM tab;
 DROP TABLE tab;
-
-SELECT 'Check memsan bug';
-SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2);
-SELECT hex(sipHash64Keyed((toUInt64(9223372036854775806), toUInt64(-9223372036854775808)), char(2147483646, -2147483648, 1, 3, 4, 7, 2147483647))) GROUP BY toUInt64(257), (toUInt64(9223372036854775806), toUInt64(2147483646));
-SELECT sipHash64Keyed((toUInt64(9223372036854775806), 9223372036854775808::UInt64), char(2)) GROUP BY toUInt64(9223372036854775806);
-
-SELECT 'Check const columns';
-DROP TABLE IF EXISTS sipHashKeyed_test;
-CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b;
-SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), 1, 'test');
-SELECT sipHash64(tuple(*)) FROM sipHashKeyed_test;
-SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), tuple(*)) FROM sipHashKeyed_test;
-SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), a, b) FROM sipHashKeyed_test;
-SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), 1, 'test'));
-SELECT hex(sipHash128(tuple(*))) FROM sipHashKeyed_test;
-SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test;
-SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test;
-DROP TABLE sipHashKeyed_test;
-
-SELECT 'Check multiple keys as tuple from a table';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4);
-INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4);
-SELECT sipHash64Keyed(key, val) FROM sipHashKeyed_keys ORDER by key;
-SELECT hex(sipHash128Keyed(key, val)) FROM sipHashKeyed_keys ORDER by key;
-DROP TABLE sipHashKeyed_keys;
-
-SELECT 'Check multiple keys as separate ints from a table';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4);
-INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4);
-SELECT sipHash64Keyed((key0, key1), val) FROM sipHashKeyed_keys ORDER by key0;
-SELECT hex(sipHash128Keyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0;
-SELECT 'Check constant key and data from a table';
-SELECT sipHash64Keyed((2::UInt64, 2::UInt64), val) FROM sipHashKeyed_keys ORDER by val;
-SELECT hex(sipHash128Keyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val;
-DROP TABLE sipHashKeyed_keys;
-
-SELECT 'Check multiple keys as separate ints from a table with constant data';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES (2, 2);
-INSERT INTO sipHashKeyed_keys VALUES (4, 4);
-SELECT sipHash64Keyed((key0, key1), 4::UInt64) FROM sipHashKeyed_keys ORDER by key0;
-SELECT hex(sipHash128Keyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0;
-DROP TABLE sipHashKeyed_keys;
-
-SELECT 'Check asan bug';
-SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY sipHash128(1::UInt8), toUInt64(9223372036854775806);
diff --git a/tests/queries/0_stateless/02552_siphash128_reference.reference b/tests/queries/0_stateless/02552_siphash128_reference.reference
index ece9f6a4615..d00491fd7e5 100644
--- a/tests/queries/0_stateless/02552_siphash128_reference.reference
+++ b/tests/queries/0_stateless/02552_siphash128_reference.reference
@@ -1,152 +1 @@
-A3817F04BA25A8E66DF67214C7550293
-DA87C1D86B99AF44347659119B22FC45
-8177228DA4A45DC7FCA38BDEF60AFFE4
-9C70B60C5267A94E5F33B6B02985ED51
-F88164C12D9C8FAF7D0F6E7C7BCD5579
-1368875980776F8854527A07690E9627
-14EECA338B208613485EA0308FD7A15E
-A1F1EBBED8DBC153C0B84AA61FF08239
-3B62A9BA6258F5610F83E264F31497B4
-264499060AD9BAABC47F8B02BB6D71ED
-00110DC378146956C95447D3F3D0FBBA
-0151C568386B6677A2B4DC6F81E5DC18
-D626B266905EF35882634DF68532C125
-9869E247E9C08B10D029934FC4B952F7
-31FCEFAC66D7DE9C7EC7485FE4494902
-5493E99933B0A8117E08EC0F97CFC3D9
-6EE2A4CA67B054BBFD3315BF85230577
-473D06E8738DB89854C066C47AE47740
-A426E5E423BF4885294DA481FEAEF723
-78017731CF65FAB074D5208952512EB1
-9E25FC833F2290733E9344A5E83839EB
-568E495ABE525A218A2214CD3E071D12
-4A29B54552D16B9A469C10528EFF0AAE
-C9D184DDD5A9F5E0CF8CE29A9ABF691C
-2DB479AE78BD50D8882A8A178A6132AD
-8ECE5F042D5E447B5051B9EACB8D8F6F
-9C0B53B4B3C307E87EAEE08678141F66
-ABF248AF69A6EAE4BFD3EB2F129EEB94
-0664DA1668574B88B935F3027358AEF4
-AA4B9DC4BF337DE90CD4FD3C467C6AB7
-EA5C7F471FAF6BDE2B1AD7D4686D2287
-2939B0183223FAFC1723DE4F52C43D35
-7C3956CA5EEAFC3E363E9D556546EB68
-77C6077146F01C32B6B69D5F4EA9FFCF
-37A6986CB8847EDF0925F0F1309B54DE
-A705F0E69DA9A8F907241A2E923C8CC8
-3DC47D1F29C448461E9E76ED904F6711
-0D62BF01E6FC0E1A0D3C4751C5D3692B
-8C03468BCA7C669EE4FD5E084BBEE7B5
-528A5BB93BAF2C9C4473CCE5D0D22BD9
-DF6A301E95C95DAD97AE0CC8C6913BD8
-801189902C857F39E73591285E70B6DB
-E617346AC9C231BB3650AE34CCCA0C5B
-27D93437EFB721AA401821DCEC5ADF89
-89237D9DED9C5E78D8B1C9B166CC7342
-4A6D8091BF5E7D651189FA94A250B14C
-0E33F96055E7AE893FFC0E3DCF492902
-E61C432B720B19D18EC8D84BDC63151B
-F7E5AEF549F782CF379055A608269B16
-438D030FD0B7A54FA837F2AD201A6403
-A590D3EE4FBF04E3247E0D27F286423F
-5FE2C1A172FE93C4B15CD37CAEF9F538
-2C97325CBD06B36EB2133DD08B3A017C
-92C814227A6BCA949FF0659F002AD39E
-DCE850110BD8328CFBD50841D6911D87
-67F14984C7DA791248E32BB5922583DA
-1938F2CF72D54EE97E94166FA91D2A36
-74481E9646ED49FE0F6224301604698E
-57FCA5DE98A9D6D8006438D0583D8A1D
-9FECDE1CEFDC1CBED4763674D9575359
-E3040C00EB28F15366CA73CBD872E740
-7697009A6A831DFECCA91C5993670F7A
-5853542321F567A005D547A4F04759BD
-5150D1772F50834A503E069A973FBD7C
 1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-Check bug with hashing of const integer values
-E940B12600C844966162FF8FE7A16AAE
-E940B12600C844966162FF8FE7A16AAE
-Check memsan bug
-1CE422FEE7BD8DE20000000000000000
-Check const columns
-B66B53476BDBEB8549A257E3B1766C30
-B66B53476BDBEB8549A257E3B1766C30
-B66B53476BDBEB8549A257E3B1766C30
-B66B53476BDBEB8549A257E3B1766C30
-Check multiple keys as tuple from a table
-E940B12600C844966162FF8FE7A16AAE
-EC58946A98A0D37F4E3FAC02FBBA9480
-Check multiple keys as separate ints from a table
-E940B12600C844966162FF8FE7A16AAE
-EC58946A98A0D37F4E3FAC02FBBA9480
-Check constant key and data from a table
-E940B12600C844966162FF8FE7A16AAE
-E940B12600C844966162FF8FE7A16AAE
-Check multiple keys as separate ints from a table with constant data
-E940B12600C844966162FF8FE7A16AAE
-EC58946A98A0D37F4E3FAC02FBBA9480
diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql
index f7324ed0ee4..200954c3b57 100644
--- a/tests/queries/0_stateless/02552_siphash128_reference.sql
+++ b/tests/queries/0_stateless/02552_siphash128_reference.sql
@@ -1,254 +1 @@
--- Test Vectors from the SipHash reference C implementation:
--- Written by
--- Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
--- Daniel J. Bernstein <djb@cr.yp.to>
--- Released under CC0
--- https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/vectors.h#L645
-
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           ''));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)));
-select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)),
-                           char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)));
-
--- CH tests
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128Reference(char(0));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128Reference(char(0, 1));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128Reference(char(0, 1, 2));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128Reference(char(0, 1, 2, 3));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128Reference(char(0, 1, 2, 3, 4));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62));
-select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63));
-
-select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
-
 SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000';
-SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000';
-
-SELECT 'Check bug with hashing of const integer values';
-DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory;
-INSERT INTO tab VALUES ((2, 2), 4);
--- these two statements must produce the same result
-SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM tab;
-SELECT hex(sipHash128ReferenceKeyed(key, 4::UInt64)) FROM tab;
-DROP TABLE tab;
-
-SELECT 'Check memsan bug';
-SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1);
-
-SELECT 'Check const columns';
-DROP TABLE IF EXISTS sipHashKeyed_test;
-CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b;
-SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), 1, 'test'));
-SELECT hex(sipHash128Reference(tuple(*))) FROM sipHashKeyed_test;
-SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test;
-SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test;
-DROP TABLE sipHashKeyed_test;
-
-SELECT 'Check multiple keys as tuple from a table';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4);
-INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4);
-SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM sipHashKeyed_keys ORDER by key;
-DROP TABLE sipHashKeyed_keys;
-
-SELECT 'Check multiple keys as separate ints from a table';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4);
-INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4);
-SELECT hex(sipHash128ReferenceKeyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0;
-SELECT 'Check constant key and data from a table';
-SELECT hex(sipHash128ReferenceKeyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val;
-DROP TABLE sipHashKeyed_keys;
-
-SELECT 'Check multiple keys as separate ints from a table with constant data';
-DROP TABLE IF EXISTS sipHashKeyed_keys;
-CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory;
-INSERT INTO sipHashKeyed_keys VALUES (2, 2);
-INSERT INTO sipHashKeyed_keys VALUES (4, 4);
-SELECT hex(sipHash128ReferenceKeyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0;
-DROP TABLE sipHashKeyed_keys;

From 7cc3372355d06dfc1184b3ebcd6d2164d179b7be Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:30:09 +0200
Subject: [PATCH 171/242] Fix terrible trash

---
 src/Functions/FunctionsHashing.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 82944630b10..090d38fa73d 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -1567,7 +1567,10 @@ public:
         if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
         {
             auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128));
-            col_to_fixed_string->getChars() = std::move(*reinterpret_cast<ColumnFixedString::Chars *>(&col_to->getData()));
+            const auto & data = col_to->getData();
+            auto & chars = col_to_fixed_string->getChars();
+            chars.resize(data.size() * sizeof(UInt128));
+            memcpy(chars.data(), data.data(), data.size() * sizeof(UInt128));
             return col_to_fixed_string;
         }
 
@@ -1601,12 +1604,12 @@ public:
     {
         selector
             .registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
-
+/*
 #if USE_MULTITARGET_CODE
         selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
         selector
             .registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
-#endif
+#endif*/
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override

From 4c2dabddb6d697ba3744e48e07e09aeaf8fc59d6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:31:38 +0200
Subject: [PATCH 172/242] Add a test

---
 tests/queries/0_stateless/02831_trash.reference | 2 ++
 tests/queries/0_stateless/02831_trash.sql       | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/02831_trash.reference
 create mode 100644 tests/queries/0_stateless/02831_trash.sql

diff --git a/tests/queries/0_stateless/02831_trash.reference b/tests/queries/0_stateless/02831_trash.reference
new file mode 100644
index 00000000000..e25f2e9e23f
--- /dev/null
+++ b/tests/queries/0_stateless/02831_trash.reference
@@ -0,0 +1,2 @@
+2761631236
+1210084689
diff --git a/tests/queries/0_stateless/02831_trash.sql b/tests/queries/0_stateless/02831_trash.sql
new file mode 100644
index 00000000000..600e2ad0695
--- /dev/null
+++ b/tests/queries/0_stateless/02831_trash.sql
@@ -0,0 +1,2 @@
+SELECT CRC32IEEE(sipHash128());
+SELECT CRC32(murmurHash3_128());

From d2b178536e1e5b6d85c917d3d26bbe2cff7594ea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:38:16 +0200
Subject: [PATCH 173/242] Fix terrible trash

---
 src/Functions/FunctionsHashing.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 090d38fa73d..8f8715ec3f1 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -1604,12 +1604,12 @@ public:
     {
         selector
             .registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
-/*
+
 #if USE_MULTITARGET_CODE
         selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
         selector
             .registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
-#endif*/
+#endif
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override

From 4d0b75ebdd1bb69e155b237768c7db7a22cb09cc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:42:16 +0200
Subject: [PATCH 174/242] Remove hashid

---
 .gitmodules                                   |   3 -
 contrib/CMakeLists.txt                        |   1 -
 contrib/hashidsxx                             |   1 -
 contrib/hashidsxx-cmake/CMakeLists.txt        |  14 --
 src/Core/Settings.h                           |   2 +-
 src/Functions/CMakeLists.txt                  |   1 -
 src/Functions/FunctionHashID.cpp              |  12 --
 src/Functions/FunctionHashID.h                | 170 ------------------
 .../0_stateless/02293_hashid.reference        |  15 --
 tests/queries/0_stateless/02293_hashid.sql    |  16 --
 ...new_functions_must_be_documented.reference |   1 -
 11 files changed, 1 insertion(+), 235 deletions(-)
 delete mode 160000 contrib/hashidsxx
 delete mode 100644 contrib/hashidsxx-cmake/CMakeLists.txt
 delete mode 100644 src/Functions/FunctionHashID.cpp
 delete mode 100644 src/Functions/FunctionHashID.h
 delete mode 100644 tests/queries/0_stateless/02293_hashid.reference
 delete mode 100644 tests/queries/0_stateless/02293_hashid.sql

diff --git a/.gitmodules b/.gitmodules
index 151dc28c55b..ba71a8ae3a7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -258,9 +258,6 @@
 [submodule "contrib/wyhash"]
 	path = contrib/wyhash
 	url = https://github.com/wangyi-fudan/wyhash
-[submodule "contrib/hashidsxx"]
-	path = contrib/hashidsxx
-	url = https://github.com/schoentoon/hashidsxx
 [submodule "contrib/nats-io"]
 	path = contrib/nats-io
 	url = https://github.com/ClickHouse/nats.c
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 2af468970f1..0f68c0cbc7c 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -164,7 +164,6 @@ add_contrib (libpq-cmake libpq)
 add_contrib (nuraft-cmake NuRaft)
 add_contrib (fast_float-cmake fast_float)
 add_contrib (datasketches-cpp-cmake datasketches-cpp)
-add_contrib (hashidsxx-cmake hashidsxx)
 
 option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
 if (ENABLE_NLP)
diff --git a/contrib/hashidsxx b/contrib/hashidsxx
deleted file mode 160000
index 783f6911ccf..00000000000
--- a/contrib/hashidsxx
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt
deleted file mode 100644
index 17f3888bd94..00000000000
--- a/contrib/hashidsxx-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
-
-set (SRCS
-    "${LIBRARY_DIR}/hashids.cpp"
-)
-
-set (HDRS
-    "${LIBRARY_DIR}/hashids.h"
-)
-
-add_library(_hashidsxx ${SRCS} ${HDRS})
-target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
-
-add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index cfcb56729d2..bde51ae9971 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -761,7 +761,7 @@ class IColumn;
     /** Experimental functions */ \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
     M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
-    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
+    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
     M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 2f5c8a212f2..06436488050 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -21,7 +21,6 @@ list (APPEND PUBLIC_LIBS
         dbms
         ch_contrib::metrohash
         ch_contrib::murmurhash
-        ch_contrib::hashidsxx
         ch_contrib::morton_nd
 )
 
diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp
deleted file mode 100644
index 829b3d9d2f6..00000000000
--- a/src/Functions/FunctionHashID.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "FunctionHashID.h"
-#include <Functions/FunctionFactory.h>
-
-namespace DB
-{
-
-REGISTER_FUNCTION(HashID)
-{
-    factory.registerFunction<FunctionHashID>();
-}
-
-}
diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h
deleted file mode 100644
index 680c3f6430b..00000000000
--- a/src/Functions/FunctionHashID.h
+++ /dev/null
@@ -1,170 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#include <hashids.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include <Interpreters/Context.h>
-
-#include <functional>
-#include <initializer_list>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int SUPPORT_IS_DISABLED;
-    extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
-    extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
-}
-
-// hashid(string, salt)
-class FunctionHashID : public IFunction
-{
-public:
-    static constexpr auto name = "hashid";
-
-    static FunctionPtr create(ContextPtr context)
-    {
-        if (!context->getSettingsRef().allow_experimental_hash_functions)
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
-                "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name);
-
-        return std::make_shared<FunctionHashID>();
-    }
-
-    String getName() const override { return name; }
-
-    size_t getNumberOfArguments() const override { return 0; }
-
-    bool isVariadic() const override { return true; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        if (arguments.empty())
-            throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
-
-        const auto & id_col = arguments[0];
-        if (!isUnsignedInteger(id_col.type))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "First argument of function {} must be unsigned integer, got {}",
-                getName(),
-                arguments[0].type->getName());
-
-        if (arguments.size() > 1)
-        {
-            const auto & hash_col = arguments[1];
-            if (!isString(hash_col.type))
-                throw Exception(
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Second argument of function {} must be String, got {}",
-                    getName(),
-                    arguments[1].type->getName());
-        }
-
-        if (arguments.size() > 2)
-        {
-            const auto & min_length_col = arguments[2];
-            if (!isUInt8(min_length_col.type))
-                throw Exception(
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Third argument of function {} must be UInt8, got {}",
-                    getName(),
-                    arguments[2].type->getName());
-        }
-
-        if (arguments.size() > 3)
-        {
-            const auto & alphabet_col = arguments[3];
-            if (!isString(alphabet_col.type))
-                throw Exception(
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Fourth argument of function {} must be String, got {}",
-                    getName(),
-                    arguments[3].type->getName());
-        }
-
-        if (arguments.size() > 4)
-        {
-            throw Exception(
-                ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
-                "Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}",
-                getName(),
-                arguments.size());
-        }
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & numcolumn = arguments[0].column;
-
-        if (checkAndGetColumn<ColumnUInt8>(numcolumn.get()) || checkAndGetColumn<ColumnUInt16>(numcolumn.get())
-            || checkAndGetColumn<ColumnUInt32>(numcolumn.get()) || checkAndGetColumn<ColumnUInt64>(numcolumn.get()))
-        {
-            std::string salt;
-            UInt8 min_length = 0;
-            std::string alphabet;
-
-            if (arguments.size() >= 4)
-            {
-                const auto & alphabetcolumn = arguments[3].column;
-                if (const auto * alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
-                {
-                    alphabet = alpha_col->getValue<String>();
-                    if (alphabet.find('\0') != std::string::npos)
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character");
-                }
-            }
-            else
-                alphabet.assign(DEFAULT_ALPHABET);
-
-            if (arguments.size() >= 3)
-            {
-                const auto & minlengthcolumn = arguments[2].column;
-                if (const auto * min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
-                    min_length = min_length_col->getValue<UInt8>();
-            }
-
-            if (arguments.size() >= 2)
-            {
-                const auto & saltcolumn = arguments[1].column;
-                if (const auto * salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
-                    salt = salt_col->getValue<String>();
-            }
-
-            hashidsxx::Hashids hash(salt, min_length, alphabet);
-
-            auto col_res = ColumnString::create();
-
-            for (size_t i = 0; i < input_rows_count; ++i)
-            {
-                col_res->insert(hash.encode({numcolumn->getUInt(i)}));
-            }
-
-            return col_res;
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function hashid",
-                arguments[0].column->getName());
-    }
-};
-
-}
diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference
deleted file mode 100644
index dfc78349c05..00000000000
--- a/tests/queries/0_stateless/02293_hashid.reference
+++ /dev/null
@@ -1,15 +0,0 @@
-0	gY
-1	jR
-2	k5
-3	l5
-4	mO
-0	pbgkmdljlpjoapne
-1	akemglnjepjpodba
-2	obmgndljgajpkeao
-3	dldokmpjpgjgeanb
-4	nkdlpgajngjnobme
-YQrvD5XGvbx
-Bm3zaOq7zbp
-oV
-oV
-6b
diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql
deleted file mode 100644
index 06af0b5e1d8..00000000000
--- a/tests/queries/0_stateless/02293_hashid.sql
+++ /dev/null
@@ -1,16 +0,0 @@
--- Tags: no-upgrade-check
-SET allow_experimental_hash_functions = 1;
-
-select number, hashid(number) from system.numbers limit 5;
-select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5;
-select hashid(1234567890123456, 's3cr3t');
-select hashid(1234567890123456, 's3cr3t2');
-
-SELECT  hashid(1, hashid(2));
-SELECT  hashid(1, 'k5');
-SELECT  hashid(1, 'k5_othersalt');
-
--- https://github.com/ClickHouse/ClickHouse/issues/39672
-SELECT
-    JSONExtractRaw(257, NULL),
-    hashid(1024, if(rand() % 10, 'truetruetruetrue', NULL), 's3\0r3t'); -- {serverError 43}
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index fc00bfdadca..595ebb483d5 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -346,7 +346,6 @@ hasAny
 hasColumnInTable
 hasSubstr
 hasThreadFuzzer
-hashid
 hex
 hiveHash
 hop

From 5f4756fb33f754913f4ab8ddfa84c39739920f19 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 04:54:58 +0200
Subject: [PATCH 175/242] Remove toDecimalString

---
 .../functions/type-conversion-functions.md    |  38 ---
 .../functions/type-conversion-functions.md    |  38 ---
 src/Functions/FunctionToDecimalString.cpp     |  22 --
 src/Functions/FunctionToDecimalString.h       | 312 ------------------
 src/IO/WriteHelpers.h                         |  44 +--
 .../02676_to_decimal_string.reference         |  21 --
 .../0_stateless/02676_to_decimal_string.sql   |  35 --
 7 files changed, 13 insertions(+), 497 deletions(-)
 delete mode 100644 src/Functions/FunctionToDecimalString.cpp
 delete mode 100644 src/Functions/FunctionToDecimalString.h
 delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.reference
 delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.sql

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 36f40b37238..c2bd525c483 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -945,44 +945,6 @@ Result:
 └────────────┴───────┘
 ```
 
-## toDecimalString
-
-Converts a numeric value to String with the number of fractional digits in the output specified by the user.
-
-**Syntax**
-
-``` sql
-toDecimalString(number, scale)
-```
-
-**Parameters**
-
-- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
-- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-    * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
-    * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
-
-**Returned value**
-
-- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
-    The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
-```
-
-Result:
-
-```response
-┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐
-│ 64.32000                                    │
-└─────────────────────────────────────────────┘
-```
-
 ## reinterpretAsUInt(8\|16\|32\|64)
 
 ## reinterpretAsInt(8\|16\|32\|64)
diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md
index e53104d8d71..088b1a9a1f1 100644
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
 └────────────┴───────┘
 ```
 
-## toDecimalString
-
-Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом.
-
-**Синтаксис**
-
-``` sql
-toDecimalString(number, scale)
-```
-
-**Параметры**
-
--   `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md),
--   `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md).
-    * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов),
-    * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60.
-
-**Возвращаемое значение**
-
--   Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части.
-    При необходимости число округляется по стандартным правилам арифметики.
-
-**Пример использования**
-
-Запрос:
-
-``` sql
-SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
-```
-
-Результат:
-
-```response
-┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐
-│ 64.32000                                    │
-└─────────────────────────────────────────────┘
-```
-
 ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
 
 ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp
deleted file mode 100644
index fe417b19137..00000000000
--- a/src/Functions/FunctionToDecimalString.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionToDecimalString.h>
-#include <Functions/IFunction.h>
-
-namespace DB
-{
-
-REGISTER_FUNCTION(ToDecimalString)
-{
-    factory.registerFunction<FunctionToDecimalString>(
-        FunctionDocumentation{
-            .description=R"(
-Returns string representation of a number. First argument is the number of any numeric type,
-second argument is the desired number of digits in fractional part. Returns String.
-
-        )",
-            .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
-            .categories{"String"}
-        }, FunctionFactory::CaseInsensitive);
-}
-
-}
diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h
deleted file mode 100644
index 6ae007e6b66..00000000000
--- a/src/Functions/FunctionToDecimalString.h
+++ /dev/null
@@ -1,312 +0,0 @@
-#pragma once
-
-#include <Core/Types.h>
-#include <Core/DecimalFunctions.h>
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnDecimal.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/WriteBufferFromVector.h>
-#include <IO/WriteHelpers.h>
-#include <Interpreters/Context_fwd.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ILLEGAL_COLUMN;
-    extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
-}
-
-class FunctionToDecimalString : public IFunction
-{
-public:
-    static constexpr auto name = "toDecimalString";
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToDecimalString>(); }
-
-    String getName() const override { return name; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    size_t getNumberOfArguments() const override { return 2; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isNumber(*arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal first argument for formatDecimal function: got {}, expected numeric type",
-                            arguments[0]->getName());
-
-        if (!isUInt8(*arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal second argument for formatDecimal function: got {}, expected UInt8",
-                            arguments[1]->getName());
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-private:
-    /// For operations with Integer/Float
-    template <typename FromVectorType>
-    void vectorConstant(const FromVectorType & vec_from, UInt8 precision,
-                        ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
-    {
-        size_t input_rows_count = vec_from.size();
-        result_offsets.resize(input_rows_count);
-
-        /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated,
-        /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case.
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            format(vec_from[i], buf_to, precision);
-            result_offsets[i] = buf_to.count();
-        }
-
-        buf_to.finalize();
-    }
-
-    template <typename FirstArgVectorType>
-    void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
-                      ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
-    {
-        size_t input_rows_count = vec_from.size();
-        result_offsets.resize(input_rows_count);
-
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            if (vec_precision[i] > max_digits)
-                throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                    "Too many fractional digits requested, shall not be more than {}", max_digits);
-            format(vec_from[i], buf_to, vec_precision[i]);
-            result_offsets[i] = buf_to.count();
-        }
-
-        buf_to.finalize();
-    }
-
-    template <typename FirstArgType>
-    void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
-                        ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
-    {
-        size_t input_rows_count = vec_precision.size();
-        result_offsets.resize(input_rows_count);
-
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            if (vec_precision[i] > max_digits)
-                throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                    "Too many fractional digits requested, shall not be more than {}", max_digits);
-            format(value_from, buf_to, vec_precision[i]);
-            result_offsets[i] = buf_to.count();
-        }
-
-        buf_to.finalize();
-    }
-
-    /// For operations with Decimal
-    template <typename FirstArgVectorType>
-    void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision,
-                        ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
-    {
-        /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77.
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-        if (precision > max_digits)
-            throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                "Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
-
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-        size_t input_rows_count = vec_from.size();
-        result_offsets.resize(input_rows_count);
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            writeText(vec_from[i], from_scale, buf_to, true, true, precision);
-            writeChar(0, buf_to);
-            result_offsets[i] = buf_to.count();
-        }
-        buf_to.finalize();
-    }
-
-    template <typename FirstArgVectorType>
-    void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
-                      ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
-    {
-        size_t input_rows_count = vec_from.size();
-        result_offsets.resize(input_rows_count);
-
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            if (vec_precision[i] > max_digits)
-                throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                    "Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
-            writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]);
-            writeChar(0, buf_to);
-            result_offsets[i] = buf_to.count();
-        }
-        buf_to.finalize();
-    }
-
-    template <typename FirstArgType>
-    void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
-                        ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
-    {
-        size_t input_rows_count = vec_precision.size();
-        result_offsets.resize(input_rows_count);
-
-        WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
-
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-
-        for (size_t i = 0; i < input_rows_count; ++i)
-        {
-            if (vec_precision[i] > max_digits)
-                throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                    "Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
-            writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]);
-            writeChar(0, buf_to);
-            result_offsets[i] = buf_to.count();
-        }
-        buf_to.finalize();
-    }
-
-    template <is_floating_point T>
-    static void format(T value, DB::WriteBuffer & out, UInt8 precision)
-    {
-        /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values,
-        /// Catch this here to give user a more reasonable error.
-        if (precision > 60)
-            throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision));
-
-        DB::DoubleConverter<false>::BufferType buffer;
-        double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
-
-        const auto result = DB::DoubleConverter<false>::instance().ToFixed(value, precision, &builder);
-
-        if (!result)
-            throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value);
-
-        out.write(buffer, builder.position());
-        writeChar(0, out);
-    }
-
-    template <is_integer T>
-    static void format(T value, DB::WriteBuffer & out, UInt8 precision)
-    {
-        /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals).
-        constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
-        if (precision > max_digits)
-            throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
-                                "Too many fractional digits requested, shall not be more than {}", max_digits);
-        writeText(value, out);
-        if (precision > 0) [[likely]]
-        {
-            writeChar('.', out);
-            for (int i = 0; i < precision; ++i)
-                writeChar('0', out);
-            writeChar(0, out);
-        }
-    }
-
-public:
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
-    {
-        switch (arguments[0].type->getTypeId())
-        {
-            case TypeIndex::UInt8:      return executeType<UInt8>(arguments);
-            case TypeIndex::UInt16:     return executeType<UInt16>(arguments);
-            case TypeIndex::UInt32:     return executeType<UInt32>(arguments);
-            case TypeIndex::UInt64:     return executeType<UInt64>(arguments);
-            case TypeIndex::UInt128:    return executeType<UInt128>(arguments);
-            case TypeIndex::UInt256:    return executeType<UInt256>(arguments);
-            case TypeIndex::Int8:       return executeType<Int8>(arguments);
-            case TypeIndex::Int16:      return executeType<Int16>(arguments);
-            case TypeIndex::Int32:      return executeType<Int32>(arguments);
-            case TypeIndex::Int64:      return executeType<Int64>(arguments);
-            case TypeIndex::Int128:     return executeType<Int128>(arguments);
-            case TypeIndex::Int256:     return executeType<Int256>(arguments);
-            case TypeIndex::Float32:    return executeType<Float32>(arguments);
-            case TypeIndex::Float64:    return executeType<Float64>(arguments);
-            case TypeIndex::Decimal32:  return executeType<Decimal32>(arguments);
-            case TypeIndex::Decimal64:  return executeType<Decimal64>(arguments);
-            case TypeIndex::Decimal128: return executeType<Decimal128>(arguments);
-            case TypeIndex::Decimal256: return executeType<Decimal256>(arguments);
-            default:
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
-                                arguments[0].column->getName(), getName());
-        }
-    }
-
-private:
-    template <typename T>
-    ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
-    {
-        const auto * from_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
-        const auto * precision_col = checkAndGetColumn<ColumnVector<UInt8>>(arguments[1].column.get());
-        const auto * precision_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
-
-        auto result_col = ColumnString::create();
-        auto * result_col_string = assert_cast<ColumnString *>(result_col.get());
-        ColumnString::Chars & result_chars = result_col_string->getChars();
-        ColumnString::Offsets & result_offsets = result_col_string->getOffsets();
-
-        if constexpr (is_decimal<T>)
-        {
-            const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
-            UInt8 from_scale = from_col->getScale();
-
-            if (from_col)
-            {
-                if (precision_col_const)
-                    vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
-                else
-                    vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale);
-            }
-            else if (from_col_const)
-                constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets, from_scale);
-            else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
-        }
-        else
-        {
-            const auto * from_col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
-            if (from_col)
-            {
-                if (precision_col_const)
-                    vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets);
-                else
-                    vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets);
-            }
-            else if (from_col_const)
-                constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets);
-            else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
-        }
-
-        return result_col;
-    }
-};
-
-}
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index aa4c9b17e48..0494cdf22e7 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf)
 inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); }
 
 template <typename T>
-void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
-                            bool fixed_fractional_length, UInt32 fractional_length)
+void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
 {
     /// If it's big integer, but the number of digits is small,
     /// use the implementation for smaller integers for more efficient arithmetic.
+
     if constexpr (std::is_same_v<T, Int256>)
     {
         if (x <= std::numeric_limits<UInt32>::max())
         {
-            writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
             return;
         }
         else if (x <= std::numeric_limits<UInt64>::max())
         {
-            writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
             return;
         }
         else if (x <= std::numeric_limits<UInt128>::max())
         {
-            writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros);
             return;
         }
     }
@@ -932,53 +932,35 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
     {
         if (x <= std::numeric_limits<UInt32>::max())
         {
-            writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
             return;
         }
         else if (x <= std::numeric_limits<UInt64>::max())
         {
-            writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
             return;
         }
     }
 
     constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
     assert(scale <= max_digits);
-    assert(fractional_length <= max_digits);
-
     char buf[max_digits];
-    memset(buf, '0', std::max(scale, fractional_length));
+    memset(buf, '0', scale);
 
     T value = x;
     Int32 last_nonzero_pos = 0;
-
-    if (fixed_fractional_length && fractional_length < scale)
-    {
-        T new_value = value / DecimalUtils::scaleMultiplier<Int256>(scale - fractional_length - 1);
-        auto round_carry = new_value % 10;
-        value = new_value / 10;
-        if (round_carry >= 5)
-            value += 1;
-    }
-
-    for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos)
+    for (Int32 pos = scale - 1; pos >= 0; --pos)
     {
         auto remainder = value % 10;
         value /= 10;
-
-        if (remainder != 0 && last_nonzero_pos == 0)
-            last_nonzero_pos = pos;
-
-        buf[pos] += static_cast<char>(remainder);
     }
 
     writeChar('.', ostr);
-    ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1));
+    ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1);
 }
 
 template <typename T>
-void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
-               bool fixed_fractional_length = false, UInt32 fractional_length = 0)
+void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
 {
     T part = DecimalUtils::getWholePart(x, scale);
 
@@ -989,7 +971,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
 
     writeIntText(part, ostr);
 
-    if (scale || (fixed_fractional_length && fractional_length > 0))
+    if (scale)
     {
         part = DecimalUtils::getFractionalPart(x, scale);
         if (part || trailing_zeros)
@@ -997,7 +979,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
             if (part < 0)
                 part *= T(-1);
 
-            writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
+            writeDecimalFractional(part, scale, ostr, trailing_zeros);
         }
     }
 }
diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference
deleted file mode 100644
index 4c27ee5b528..00000000000
--- a/tests/queries/0_stateless/02676_to_decimal_string.reference
+++ /dev/null
@@ -1,21 +0,0 @@
-2.00000000000000000000000000000000000000000000000000000000000000000000000000000
-2.12
--2.00000000000000000000000000000000000000000000000000000000000000000000000000000
--2.12
-2.987600000000000033395508580724708735942840576171875000000000
-2.15
--2.987600000000000033395508580724708735942840576171875000000000
--2.15
-64.1230010986
-64.2340000000
--64.1230010986
--64.2340000000
--32.345
-32.34500000000000000000000000000000000000000000000000000000000000000000000000000
-32.46
--64.5671232345
-128.78932312332132985464
--128.78932312332132985464
-128.78932312332132985464000000000000000000000000000000000000000000000000000000000
-128.7893231233
--128.78932312332132985464123123789323123321329854600000000000000000000000000000000
diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql
deleted file mode 100644
index 563d60c62c7..00000000000
--- a/tests/queries/0_stateless/02676_to_decimal_string.sql
+++ /dev/null
@@ -1,35 +0,0 @@
--- Regular types
-SELECT toDecimalString(2, 77);  -- more digits required than exist
-SELECT toDecimalString(2.123456, 2);  -- rounding
-SELECT toDecimalString(-2, 77);  -- more digits required than exist
-SELECT toDecimalString(-2.123456, 2);  -- rounding
-
-SELECT toDecimalString(2.9876, 60);  -- more digits required than exist (took 60 as it is float by default)
-SELECT toDecimalString(2.1456, 2);  -- rounding
-SELECT toDecimalString(-2.9876, 60);  -- more digits required than exist
-SELECT toDecimalString(-2.1456, 2);  -- rounding
-
--- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway.
-SELECT toDecimalString(64.123::Float32, 10);
-SELECT toDecimalString(64.234::Float64, 10);
-SELECT toDecimalString(-64.123::Float32, 10);
-SELECT toDecimalString(-64.234::Float64, 10);
-
--- Decimals
-SELECT toDecimalString(-32.345::Decimal32(3), 3);
-SELECT toDecimalString(32.345::Decimal32(3), 77);  -- more digits required than exist
-SELECT toDecimalString(32.456::Decimal32(3), 2);  -- rounding
-SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10);
-SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20);
-SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20);  -- rounding
-SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77);  -- more digits required than exist
-SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10);  -- rounding
-SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77);  -- more digits required than exist
-
--- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float.
--- These values shall work OK.
-SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
-SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
-SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
-SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
-SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}

From cda42e6dd4eaa56822ad64aad7aa09f632547d93 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 05:05:51 +0200
Subject: [PATCH 176/242] Add a test

---
 .../queries/0_stateless/02831_regexp_analyze_recursion.reference | 0
 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql     | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.reference
 create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql

diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference b/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql
new file mode 100644
index 00000000000..018d1f031e6
--- /dev/null
+++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql
@@ -0,0 +1 @@
+SELECT match('', repeat('(', 100000)); -- { serverError 306 }

From 21ffce0ff20fc7f136d8d5b05369a1abcdc01be3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 05:06:02 +0200
Subject: [PATCH 177/242] Check regular expression depth

---
 src/Common/OptimizedRegularExpression.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index c542945c78d..0b80e2f3f97 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -1,6 +1,7 @@
 #include <limits>
 #include <Common/Exception.h>
 #include <Common/PODArray.h>
+#include <Common/checkStackSize.h>
 #include <Common/OptimizedRegularExpression.h>
 
 #define MIN_LENGTH_FOR_STRSTR 3
@@ -50,6 +51,8 @@ const char * analyzeImpl(
     bool & is_trivial,
     Literals & global_alternatives)
 {
+    checkStackSize();
+
     /** The expression is trivial if all the metacharacters in it are escaped.
       * The non-alternative string is
       *  a string outside parentheses,

From de2016261ef32878456de9efae5cfab748611853 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 05:08:27 +0200
Subject: [PATCH 178/242] Get rid of it

---
 docker/test/fasttest/run.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 828c73e6781..e25b5fdbfed 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -141,7 +141,6 @@ function clone_submodules
             contrib/jemalloc
             contrib/replxx
             contrib/wyhash
-            contrib/hashidsxx
             contrib/c-ares
             contrib/morton-nd
             contrib/xxHash

From 20625d75ab52319b8e67e50d2df803d0e2dc0934 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 15 Jun 2023 14:08:43 +0200
Subject: [PATCH 179/242] Fix optimize_skip_unused_shards with JOINs

In case of JOIN query may contains conditions for other tables, while
optimize_skip_unused_shards was pretty dumb and failed to skip such
columns.

Fix this by removing JOIN before applying this optimization.

v2: restriction for analyzer
v3: ignore 01940_custom_tld_sharding_key under analyzer
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
Co-Authored-By: Alexey Milovidov <milovidov@clickhouse.com>
---
 src/Storages/StorageDistributed.cpp           | 42 ++++++++------
 src/Storages/StorageDistributed.h             |  4 +-
 tests/analyzer_tech_debt.txt                  |  2 +
 ...optimize_skip_unused_shards_join.reference |  0
 ...02790_optimize_skip_unused_shards_join.sql | 55 +++++++++++++++++++
 5 files changed, 84 insertions(+), 19 deletions(-)
 create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference
 create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 0727658160c..1a99d272cab 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -75,6 +75,7 @@
 #include <Interpreters/getTableExpressions.h>
 #include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
+#include <Interpreters/getHeaderForProcessingStage.h>
 
 #include <Functions/IFunction.h>
 #include <Functions/FunctionFactory.h>
@@ -434,7 +435,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
         {
             /// Always calculate optimized cluster here, to avoid conditions during read()
             /// (Anyway it will be calculated in the read())
-            ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
+            ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info);
             if (optimized_cluster)
             {
                 LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
@@ -1297,7 +1298,7 @@ ClusterPtr StorageDistributed::getCluster() const
 }
 
 ClusterPtr StorageDistributed::getOptimizedCluster(
-    ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const
+    ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const
 {
     ClusterPtr cluster = getCluster();
     const Settings & settings = local_context->getSettingsRef();
@@ -1306,7 +1307,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
 
     if (has_sharding_key && sharding_key_is_usable)
     {
-        ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context);
+        ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context);
         if (optimized)
             return optimized;
     }
@@ -1355,25 +1356,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c
 /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
 ClusterPtr StorageDistributed::skipUnusedShards(
     ClusterPtr cluster,
-    const ASTPtr & query_ptr,
+    const SelectQueryInfo & query_info,
     const StorageSnapshotPtr & storage_snapshot,
     ContextPtr local_context) const
 {
-    const auto & select = query_ptr->as<ASTSelectQuery &>();
-
+    const auto & select = query_info.query->as<ASTSelectQuery &>();
     if (!select.prewhere() && !select.where())
-    {
         return nullptr;
-    }
+
+    /// FIXME: support analyzer
+    if (!query_info.syntax_analyzer_result)
+        return nullptr;
 
     ASTPtr condition_ast;
-    if (select.prewhere() && select.where())
+    /// Remove JOIN from the query since it may contain a condition for other tables.
+    /// But only the conditions for the left table should be analyzed for shard skipping.
     {
-        condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone());
-    }
-    else
-    {
-        condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
+        ASTPtr select_without_join_ptr = select.clone();
+        ASTSelectQuery select_without_join = select_without_join_ptr->as<ASTSelectQuery &>();
+        TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result;
+
+        removeJoin(select_without_join, analyzer_result_without_join, local_context);
+        if (!select_without_join.prewhere() && !select_without_join.where())
+            return nullptr;
+
+        if (select_without_join.prewhere() && select_without_join.where())
+            condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone());
+        else
+            condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone();
     }
 
     replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot);
@@ -1396,11 +1406,9 @@ ClusterPtr StorageDistributed::skipUnusedShards(
         return nullptr;
     }
 
-    // Can't get definite answer if we can skip any shards
+    // Can't get a definite answer if we can skip any shards
     if (!blocks)
-    {
         return nullptr;
-    }
 
     std::set<int> shards;
 
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index f45286341cf..615d6e337b6 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -182,10 +182,10 @@ private:
     /// Apply the following settings:
     /// - optimize_skip_unused_shards
     /// - force_optimize_skip_unused_shards
-    ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const;
+    ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const;
 
     ClusterPtr skipUnusedShards(
-        ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
+        ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
 
     /// This method returns optimal query processing stage.
     ///
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index e0f259306aa..8ffb94e17b8 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -130,3 +130,5 @@
 02581_share_big_sets_between_mutation_tasks_long
 02581_share_big_sets_between_multiple_mutations_tasks_long
 00992_system_parts_race_condition_zookeeper_long
+02790_optimize_skip_unused_shards_join
+01940_custom_tld_sharding_key
diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql
new file mode 100644
index 00000000000..0773e0a9a5e
--- /dev/null
+++ b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql
@@ -0,0 +1,55 @@
+-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995
+
+DROP TABLE IF EXISTS outer;
+DROP TABLE IF EXISTS inner;
+
+DROP TABLE IF EXISTS outer_distributed;
+DROP TABLE IF EXISTS inner_distributed;
+
+CREATE TABLE IF NOT EXISTS outer
+(
+    `id` UInt64,
+    `organization_id` UInt64,
+    `version` UInt64
+)
+ENGINE = ReplacingMergeTree(version)
+PARTITION BY organization_id % 8
+ORDER BY (organization_id, id);
+
+CREATE TABLE inner
+(
+    `id` UInt64,
+    `outer_id` UInt64,
+    `organization_id` UInt64,
+    `version` UInt64,
+    `date` Date
+)
+ENGINE = ReplacingMergeTree(version)
+PARTITION BY toYYYYMM(date)
+ORDER BY (organization_id, outer_id);
+
+CREATE TABLE inner_distributed AS inner
+ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id));
+
+CREATE TABLE outer_distributed AS outer
+ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id));
+
+SELECT
+    sum(if(inner_distributed.id != 0, 1, 0)) AS total,
+    inner_distributed.date AS date
+FROM outer_distributed AS outer_distributed
+FINAL
+LEFT JOIN
+(
+    SELECT
+        inner_distributed.outer_id AS outer_id,
+        inner_distributed.id AS id,
+        inner_distributed.date AS date
+    FROM inner_distributed AS inner_distributed
+    FINAL
+    WHERE inner_distributed.organization_id = 15078
+) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id
+WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01'))
+GROUP BY date
+ORDER BY date DESC
+SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1;

From c3c6560c9511163fc14eac2be8f89c30d3bcce1d Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Sat, 22 Jul 2023 08:44:19 +0000
Subject: [PATCH 180/242] Add 02815_range_dict_no_direct_join to
 analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index e0f259306aa..19b90a39800 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -130,3 +130,4 @@
 02581_share_big_sets_between_mutation_tasks_long
 02581_share_big_sets_between_multiple_mutations_tasks_long
 00992_system_parts_race_condition_zookeeper_long
+02815_range_dict_no_direct_join

From 5ca6c97832f786e6e3be085e3ec79829f9233cdd Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Sat, 22 Jul 2023 12:03:20 +0200
Subject: [PATCH 181/242] Update gtest_lru_file_cache.cpp

---
 src/Interpreters/tests/gtest_lru_file_cache.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index 12e7d9372f7..dab14a66ed7 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -489,7 +489,6 @@ TEST_F(FileCacheTest, get)
 
             download(file_segment);
             ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
-            file_segment.completePartAndResetDownloader();
 
             other_1.join();
 

From 363201270c00f0ebfa61e80471b372f434370380 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 17:49:53 +0300
Subject: [PATCH 182/242] Update 01710_query_log_with_projection_info.sql

---
 .../0_stateless/01710_query_log_with_projection_info.sql        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql
index 25e7e8fed60..cd84b392fe5 100644
--- a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql
+++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql
@@ -62,3 +62,5 @@ FROM
     system.query_log
 WHERE
     current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;';
+
+DROP TABLE t;

From dab954a92d7893a7ebbef9cda0a3aedf63a96a50 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sat, 22 Jul 2023 18:10:54 +0200
Subject: [PATCH 183/242] do not throw exception in
 OptimizedRegularExpressionImpl::analyze

---
 src/Common/OptimizedRegularExpression.cpp           | 13 +++++++++++--
 .../0_stateless/02831_regexp_analyze_recursion.sql  |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 0b80e2f3f97..918ebd75fc0 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -423,6 +423,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
         bool & is_trivial,
         bool & required_substring_is_prefix,
         std::vector<std::string> & alternatives)
+try
 {
     Literals alternative_literals;
     Literal required_literal;
@@ -432,12 +433,20 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
     for (auto & lit : alternative_literals)
         alternatives.push_back(std::move(lit.literal));
 }
+catch(...)
+{
+    required_substring = "";
+    is_trivial = false;
+    required_substring_is_prefix = false;
+    alternatives.clear();
+    std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n";
+}
 
 template <bool thread_safe>
 OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
 {
-    std::vector<std::string> alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
-    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy);
+    std::vector<std::string> alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
+    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy);
 
 
     /// Just three following options are supported
diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql
index 018d1f031e6..a2075ae903b 100644
--- a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql
+++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql
@@ -1 +1 @@
-SELECT match('', repeat('(', 100000)); -- { serverError 306 }
+SELECT match('', repeat('(', 100000)); -- { serverError 427 }

From c60090ccbd30143d44ab715b8b7b5e0060a2095f Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 22 Jul 2023 17:43:22 +0000
Subject: [PATCH 184/242] Add test with materialize() + fix

---
 ...2810_fix_remove_dedundant_distinct_view.reference | 12 ++++++++++--
 .../02810_fix_remove_dedundant_distinct_view.sql     | 10 +++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
index 01f14f82e94..ec714a5df07 100644
--- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
+++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference
@@ -8,6 +8,14 @@ FROM
 )
 WHERE explain ILIKE '%distinct%';
 2
-SELECT DISTINCT x FROM tab_v;
-2
+SELECT DISTINCT x FROM tab_v ORDER BY x;
 1
+2
+-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed
+SELECT count()
+FROM
+(
+    EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab))
+)
+WHERE explain ILIKE '%distinct%';
+2
diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
index 99fc24dae8b..ca0a2edd99d 100644
--- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
+++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
@@ -19,4 +19,12 @@ FROM
 )
 WHERE explain ILIKE '%distinct%';
 
-SELECT DISTINCT x FROM tab_v;
+SELECT DISTINCT x FROM tab_v ORDER BY x;
+
+-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed
+SELECT count()
+FROM
+(
+    EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab))
+)
+WHERE explain ILIKE '%distinct%';

From afdda489bdfb27d1db2a7554223f5dfcb8cca7b1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 19:53:16 +0200
Subject: [PATCH 185/242] Fix test

---
 .../02790_sql_standard_fetch.reference        | 72 +++++++++----------
 .../0_stateless/02790_sql_standard_fetch.sql  | 12 ++--
 2 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
index 429eecbc936..270af6e5c17 100644
--- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
@@ -1,36 +1,36 @@
-┌─id─┬─name──┬─department─┬─salary─┐
-│ 25 │ Frank │ it         │    120 │
-│ 23 │ Henry │ it         │    104 │
-│ 24 │ Irene │ it         │    104 │
-│ 33 │ Alice │ sales      │    100 │
-│ 32 │ Dave  │ sales      │     96 │
-└────┴───────┴────────────┴────────┘
-┌─id─┬─name──┬─department─┬─salary─┐
-│ 25 │ Frank │ it         │    120 │
-│ 23 │ Henry │ it         │    104 │
-│ 24 │ Irene │ it         │    104 │
-│ 33 │ Alice │ sales      │    100 │
-│ 32 │ Dave  │ sales      │     96 │
-└────┴───────┴────────────┴────────┘
-┌─id─┬─name──┬─department─┬─salary─┐
-│ 25 │ Frank │ it         │    120 │
-│ 23 │ Henry │ it         │    104 │
-│ 24 │ Irene │ it         │    104 │
-│ 33 │ Alice │ sales      │    100 │
-│ 31 │ Cindy │ sales      │     96 │
-│ 32 │ Dave  │ sales      │     96 │
-└────┴───────┴────────────┴────────┘
-┌─id─┬─name──┬─department─┬─salary─┐
-│ 33 │ Alice │ sales      │    100 │
-│ 31 │ Cindy │ sales      │     96 │
-│ 32 │ Dave  │ sales      │     96 │
-│ 22 │ Grace │ it         │     90 │
-│ 21 │ Emma  │ it         │     84 │
-└────┴───────┴────────────┴────────┘
-┌─id─┬─name──┬─department─┬─salary─┐
-│ 33 │ Alice │ sales      │    100 │
-│ 31 │ Cindy │ sales      │     96 │
-│ 32 │ Dave  │ sales      │     96 │
-│ 22 │ Grace │ it         │     90 │
-│ 21 │ Emma  │ it         │     84 │
-└────┴───────┴────────────┴────────┘
+┌─id─┬─name───────────┬─department─┬─salary─┐
+│ 25 │ Frank          │ it         │    120 │
+│ 23 │ Henry or Irene │ it         │    104 │
+│ 24 │ Henry or Irene │ it         │    104 │
+│ 33 │ Alice          │ sales      │    100 │
+│ 32 │ Dave or Cindy  │ sales      │     96 │
+└────┴────────────────┴────────────┴────────┘
+┌─id─┬─name───────────┬─department─┬─salary─┐
+│ 25 │ Frank          │ it         │    120 │
+│ 23 │ Henry or Irene │ it         │    104 │
+│ 24 │ Henry or Irene │ it         │    104 │
+│ 33 │ Alice          │ sales      │    100 │
+│ 32 │ Dave or Cindy  │ sales      │     96 │
+└────┴────────────────┴────────────┴────────┘
+┌─id─┬─name───────────┬─department─┬─salary─┐
+│ 25 │ Frank          │ it         │    120 │
+│ 23 │ Henry or Irene │ it         │    104 │
+│ 24 │ Henry or Irene │ it         │    104 │
+│ 33 │ Alice          │ sales      │    100 │
+│ 31 │ Dave or Cindy  │ sales      │     96 │
+│ 32 │ Dave or Cindy  │ sales      │     96 │
+└────┴────────────────┴────────────┴────────┘
+┌─id─┬─name──────────┬─department─┬─salary─┐
+│ 33 │ Alice         │ sales      │    100 │
+│ 31 │ Dave or Cindy │ sales      │     96 │
+│ 32 │ Dave or Cindy │ sales      │     96 │
+│ 22 │ Grace         │ it         │     90 │
+│ 21 │ Emma          │ it         │     84 │
+└────┴───────────────┴────────────┴────────┘
+┌─id─┬─name──────────┬─department─┬─salary─┐
+│ 33 │ Alice         │ sales      │    100 │
+│ 31 │ Dave or Cindy │ sales      │     96 │
+│ 32 │ Dave or Cindy │ sales      │     96 │
+│ 22 │ Grace         │ it         │     90 │
+│ 21 │ Emma          │ it         │     84 │
+└────┴───────────────┴────────────┴────────┘
diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
index 4204279a746..07a806eddf9 100644
--- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
@@ -1,33 +1,33 @@
 -- https://antonz.org/sql-fetch/
 
 CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64);
-INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84');
+INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', 84);
 
 -- Determinism
 SET max_threads = 1, parallelize_output_from_storages = 0;
 
-select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 limit 5
 format PrettyCompactNoEscapes;
 
-select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows only
 format PrettyCompactNoEscapes;
 
-select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows with ties
 format PrettyCompactNoEscapes;
 
-select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch next 5 rows only
 format PrettyCompactNoEscapes;
 
-select * from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch first 5 rows only

From 7a24de801d93957cd87e8a1d2f726b934912b038 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 19:53:45 +0200
Subject: [PATCH 186/242] Fix test

---
 .../02790_sql_standard_fetch.reference        | 72 +++++++++----------
 .../0_stateless/02790_sql_standard_fetch.sql  | 10 +--
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
index 270af6e5c17..ef578b526da 100644
--- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference
@@ -1,36 +1,36 @@
-┌─id─┬─name───────────┬─department─┬─salary─┐
-│ 25 │ Frank          │ it         │    120 │
-│ 23 │ Henry or Irene │ it         │    104 │
-│ 24 │ Henry or Irene │ it         │    104 │
-│ 33 │ Alice          │ sales      │    100 │
-│ 32 │ Dave or Cindy  │ sales      │     96 │
-└────┴────────────────┴────────────┴────────┘
-┌─id─┬─name───────────┬─department─┬─salary─┐
-│ 25 │ Frank          │ it         │    120 │
-│ 23 │ Henry or Irene │ it         │    104 │
-│ 24 │ Henry or Irene │ it         │    104 │
-│ 33 │ Alice          │ sales      │    100 │
-│ 32 │ Dave or Cindy  │ sales      │     96 │
-└────┴────────────────┴────────────┴────────┘
-┌─id─┬─name───────────┬─department─┬─salary─┐
-│ 25 │ Frank          │ it         │    120 │
-│ 23 │ Henry or Irene │ it         │    104 │
-│ 24 │ Henry or Irene │ it         │    104 │
-│ 33 │ Alice          │ sales      │    100 │
-│ 31 │ Dave or Cindy  │ sales      │     96 │
-│ 32 │ Dave or Cindy  │ sales      │     96 │
-└────┴────────────────┴────────────┴────────┘
-┌─id─┬─name──────────┬─department─┬─salary─┐
-│ 33 │ Alice         │ sales      │    100 │
-│ 31 │ Dave or Cindy │ sales      │     96 │
-│ 32 │ Dave or Cindy │ sales      │     96 │
-│ 22 │ Grace         │ it         │     90 │
-│ 21 │ Emma          │ it         │     84 │
-└────┴───────────────┴────────────┴────────┘
-┌─id─┬─name──────────┬─department─┬─salary─┐
-│ 33 │ Alice         │ sales      │    100 │
-│ 31 │ Dave or Cindy │ sales      │     96 │
-│ 32 │ Dave or Cindy │ sales      │     96 │
-│ 22 │ Grace         │ it         │     90 │
-│ 21 │ Emma          │ it         │     84 │
-└────┴───────────────┴────────────┴────────┘
+┌─name───────────┬─department─┬─salary─┐
+│ Frank          │ it         │    120 │
+│ Henry or Irene │ it         │    104 │
+│ Henry or Irene │ it         │    104 │
+│ Alice          │ sales      │    100 │
+│ Dave or Cindy  │ sales      │     96 │
+└────────────────┴────────────┴────────┘
+┌─name───────────┬─department─┬─salary─┐
+│ Frank          │ it         │    120 │
+│ Henry or Irene │ it         │    104 │
+│ Henry or Irene │ it         │    104 │
+│ Alice          │ sales      │    100 │
+│ Dave or Cindy  │ sales      │     96 │
+└────────────────┴────────────┴────────┘
+┌─name───────────┬─department─┬─salary─┐
+│ Frank          │ it         │    120 │
+│ Henry or Irene │ it         │    104 │
+│ Henry or Irene │ it         │    104 │
+│ Alice          │ sales      │    100 │
+│ Dave or Cindy  │ sales      │     96 │
+│ Dave or Cindy  │ sales      │     96 │
+└────────────────┴────────────┴────────┘
+┌─name──────────┬─department─┬─salary─┐
+│ Alice         │ sales      │    100 │
+│ Dave or Cindy │ sales      │     96 │
+│ Dave or Cindy │ sales      │     96 │
+│ Grace         │ it         │     90 │
+│ Emma          │ it         │     84 │
+└───────────────┴────────────┴────────┘
+┌─name──────────┬─department─┬─salary─┐
+│ Alice         │ sales      │    100 │
+│ Dave or Cindy │ sales      │     96 │
+│ Dave or Cindy │ sales      │     96 │
+│ Grace         │ it         │     90 │
+│ Emma          │ it         │     84 │
+└───────────────┴────────────┴────────┘
diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
index 07a806eddf9..638cc66682d 100644
--- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql
+++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql
@@ -6,28 +6,28 @@ INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104),
 -- Determinism
 SET max_threads = 1, parallelize_output_from_storages = 0;
 
-select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 limit 5
 format PrettyCompactNoEscapes;
 
-select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows only
 format PrettyCompactNoEscapes;
 
-select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 fetch first 5 rows with ties
 format PrettyCompactNoEscapes;
 
-select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch next 5 rows only
 format PrettyCompactNoEscapes;
 
-select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
+select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
 order by salary desc
 offset 3 rows
 fetch first 5 rows only

From 2c6bc318476ce98b916cd2ffb6a9a44f5a5488f8 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 22 Jul 2023 18:07:22 +0000
Subject: [PATCH 187/242] Test is not dependent on new analyzer

---
 .../0_stateless/02810_fix_remove_dedundant_distinct_view.sql    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
index ca0a2edd99d..10a68721c51 100644
--- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
+++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql
@@ -1,5 +1,3 @@
-set allow_experimental_analyzer=1;
-
 drop table if exists tab_v;
 drop table if exists tab;
 create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y);

From d25cd0d0b635196b1a4cb2178d93b7060bf02819 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 22 Jul 2023 20:21:33 +0200
Subject: [PATCH 188/242] Partial revert

---
 tests/queries/0_stateless/01187_set_profile_as_setting.sh       | 2 +-
 .../0_stateless/02360_rename_table_along_with_log_name.sh       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
index fccac57aea8..dacb609d790 100755
--- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh
+++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
@@ -4,13 +4,13 @@
 unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 $CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';"
 $CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';"
 $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK"
+CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g')
 $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';"
 
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'"
diff --git a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
index c07dcdd549b..e8c7f844b5c 100755
--- a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
+++ b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh
@@ -1,7 +1,6 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
@@ -12,6 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS y;"
 $CLICKHOUSE_CLIENT -q "CREATE TABLE x(i int) ENGINE MergeTree ORDER BY i;"
 $CLICKHOUSE_CLIENT -q "RENAME TABLE x TO y;"
 
+CLICKHOUSE_CLIENT_WITH_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g')
 regexp="${CLICKHOUSE_DATABASE}\\.x" # Check if there are still log entries with old table name
 $CLICKHOUSE_CLIENT_WITH_LOG --send_logs_source_regexp "$regexp" -q "INSERT INTO y VALUES(1);"
 

From 0b258dda4ee618a4d002e2b5246d68bbd2c77c7e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 21 Jul 2023 08:31:45 +0200
Subject: [PATCH 189/242] Reproducible builds for Rust

From now on cargo will not download anything from the internet during
builds. This step had been moved for docker image builds (via cargo
vendor).

And now cargo inside docker.io/clickhouse/binary-builder will not use
any crates from the internet, so we don't need to add --offline for
cargo commands in cmake (corrosion_import_crate()).

Also the docker build command had been adjusted to allow following
symlinks inside build context, by using tar, this is required for Rust
packages.

Note, that to make proper Cargo.lock that could be vendored I did the
following:
- per-project locks had been removed (since there is no automatic way to
  sync the workspace Cargo.lock with per-project Cargo.lock, since cargo
  update/generate-lockfile will use only per-project Cargo.toml files
  apparently, -Z minimal-versions does not helps either)
- and to generate Cargo.lock with less changes I've pinned version in
  the Cargo.toml strictly, i.e. not 'foo = "0.1"' but 'foo = "=0.1"'
  then the Cargo.lock for workspace had been generated and afterwards
  I've reverted this part.

Plus I have to update the dependencies afterwards, since otherwise there
are conflicts with dependencies for std library. Non trivial.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .gitignore                        |   2 +
 docker/packager/binary/Dockerfile |  27 ++
 docker/packager/binary/rust       |   1 +
 rust/.dockerignore                |   4 +
 rust/.gitignore                   |   4 +
 rust/BLAKE3/Cargo.lock            |  92 -----
 rust/CMakeLists.txt               |   2 +
 rust/{skim => }/Cargo.lock        | 519 +++++++++++++++++++++++++--
 rust/Cargo.toml                   |  12 +
 rust/prql/Cargo.lock              | 569 ------------------------------
 tests/ci/docker_images_check.py   |  33 +-
 tests/ci/docker_test.py           |  12 +-
 12 files changed, 582 insertions(+), 695 deletions(-)
 create mode 120000 docker/packager/binary/rust
 create mode 100644 rust/.dockerignore
 create mode 100644 rust/.gitignore
 delete mode 100644 rust/BLAKE3/Cargo.lock
 rename rust/{skim => }/Cargo.lock (66%)
 create mode 100644 rust/Cargo.toml
 delete mode 100644 rust/prql/Cargo.lock

diff --git a/.gitignore b/.gitignore
index 39d6f3f9fc8..5341f23a94f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,6 +69,7 @@ cmake-build-*
 *.pyc
 __pycache__
 *.pytest_cache
+.mypy_cache
 
 test.cpp
 CPackConfig.cmake
@@ -167,3 +168,4 @@ tests/integration/**/_gen
 /rust/**/target
 # It is autogenerated from *.in
 /rust/**/.cargo/config.toml
+/rust/**/vendor
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 897bcd24d04..99e748c41d4 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
     rustup target add aarch64-apple-darwin && \
     rustup target add powerpc64le-unknown-linux-gnu
 
+# Create vendor cache for cargo.
+#
+# Note, that the config.toml for the root is used, you will not be able to
+# install any other crates, except those which had been vendored (since if
+# there is "replace-with" for some source, then cargo will not look to other
+# remotes except this).
+#
+# Notes for the command itself:
+# - --chown is required to preserve the rights
+# - unstable-options for -C
+# - chmod is required to fix the permissions, since builds are running from a different user
+# - copy of the Cargo.lock is required for proper dependencies versions
+# - cargo vendor --sync is requried to overcome [1] bug.
+#
+#   [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23
+COPY --chown=root:root /rust /rust/packages
+RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \
+    cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \
+    cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \
+    rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \
+    sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \
+    cat $CARGO_HOME/config.toml && \
+    mv /rust/packages/vendor /rust/vendor && \
+    chmod -R o=r+X /rust/vendor && \
+    ls -R -l /rust/packages && \
+    rm -r /rust/packages
+
 # NOTE: Seems like gcc-11 is too new for ubuntu20 repository
 # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
 RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust
new file mode 120000
index 00000000000..742dc49e9ac
--- /dev/null
+++ b/docker/packager/binary/rust
@@ -0,0 +1 @@
+../../../rust
\ No newline at end of file
diff --git a/rust/.dockerignore b/rust/.dockerignore
new file mode 100644
index 00000000000..6b761aa401c
--- /dev/null
+++ b/rust/.dockerignore
@@ -0,0 +1,4 @@
+# Just in case ignore any cargo stuff (and just in case someone will run this
+# docker build locally with build context using folder root):
+target
+vendor
diff --git a/rust/.gitignore b/rust/.gitignore
new file mode 100644
index 00000000000..f850cd563c9
--- /dev/null
+++ b/rust/.gitignore
@@ -0,0 +1,4 @@
+# This is for tar --exclude-vcs-ignores (and just in case someone will run
+# docker build locally with build context created via tar):
+target
+vendor
diff --git a/rust/BLAKE3/Cargo.lock b/rust/BLAKE3/Cargo.lock
deleted file mode 100644
index 9ac60773732..00000000000
--- a/rust/BLAKE3/Cargo.lock
+++ /dev/null
@@ -1,92 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "_ch_rust_blake3"
-version = "0.1.0"
-dependencies = [
- "blake3",
- "libc",
-]
-
-[[package]]
-name = "arrayref"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
-
-[[package]]
-name = "arrayvec"
-version = "0.7.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
-
-[[package]]
-name = "blake3"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413"
-dependencies = [
- "arrayref",
- "arrayvec",
- "cc",
- "cfg-if",
- "constant_time_eq",
- "digest",
-]
-
-[[package]]
-name = "cc"
-version = "1.0.73"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "constant_time_eq"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
-
-[[package]]
-name = "digest"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "generic-array"
-version = "0.14.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
-dependencies = [
- "typenum",
- "version_check",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.132"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
-
-[[package]]
-name = "typenum"
-version = "1.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
-
-[[package]]
-name = "version_check"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt
index 41451fe0a1e..ca0886cb300 100644
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@@ -55,6 +55,8 @@ function(clickhouse_import_crate)
         endif()
     endif()
 
+    # Note, here --offline is not used, since on CI vendor archive is used, and
+    # passing --offline here will be inconvenient for local development.
     corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
 endfunction()
 
diff --git a/rust/skim/Cargo.lock b/rust/Cargo.lock
similarity index 66%
rename from rust/skim/Cargo.lock
rename to rust/Cargo.lock
index f55ea8a84b0..07bbf8ba27e 100644
--- a/rust/skim/Cargo.lock
+++ b/rust/Cargo.lock
@@ -2,6 +2,22 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "_ch_rust_blake3"
+version = "0.1.0"
+dependencies = [
+ "blake3",
+ "libc",
+]
+
+[[package]]
+name = "_ch_rust_prql"
+version = "0.1.0"
+dependencies = [
+ "prql-compiler",
+ "serde_json",
+]
+
 [[package]]
 name = "_ch_rust_skim_rust"
 version = "0.1.0"
@@ -12,6 +28,32 @@ dependencies = [
  "term",
 ]
 
+[[package]]
+name = "addr2line"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "ahash"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
+dependencies = [
+ "getrandom",
+ "once_cell",
+ "version_check",
+]
+
 [[package]]
 name = "aho-corasick"
 version = "1.0.2"
@@ -36,6 +78,31 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anyhow"
+version = "1.0.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
+dependencies = [
+ "backtrace",
+]
+
+[[package]]
+name = "ariadne"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
+dependencies = [
+ "unicode-width",
+ "yansi",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+
 [[package]]
 name = "arrayvec"
 version = "0.7.4"
@@ -48,6 +115,21 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
+[[package]]
+name = "backtrace"
+version = "0.3.68"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
+dependencies = [
+ "addr2line",
+ "cc",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+]
+
 [[package]]
 name = "beef"
 version = "0.5.2"
@@ -60,6 +142,29 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
+[[package]]
+name = "blake3"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "digest",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.13.0"
@@ -93,6 +198,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "chumsky"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
+dependencies = [
+ "hashbrown 0.12.3",
+ "stacker",
+]
+
 [[package]]
 name = "codespan-reporting"
 version = "0.11.1"
@@ -103,6 +218,12 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "constant_time_eq"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.4"
@@ -177,10 +298,41 @@ dependencies = [
 ]
 
 [[package]]
-name = "cxx"
-version = "1.0.101"
+name = "crypto-common"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "cxx"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -190,9 +342,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.101"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
+checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -200,24 +352,24 @@ dependencies = [
  "proc-macro2",
  "quote",
  "scratch",
- "syn 2.0.26",
+ "syn 2.0.27",
 ]
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.101"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
+checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.101"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
+checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.27",
 ]
 
 [[package]]
@@ -296,6 +448,17 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
 [[package]]
 name = "dirs-next"
 version = "2.0.0"
@@ -319,9 +482,27 @@ dependencies = [
 
 [[package]]
 name = "either"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
+name = "enum-as-inner"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
 name = "fnv"
@@ -338,6 +519,16 @@ dependencies = [
  "thread_local",
 ]
 
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.10"
@@ -349,6 +540,33 @@ dependencies = [
  "wasi 0.11.0+wasi-snapshot-preview1",
 ]
 
+[[package]]
+name = "gimli"
+version = "0.27.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+dependencies = [
+ "ahash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
 [[package]]
 name = "hermit-abi"
 version = "0.3.2"
@@ -384,6 +602,31 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
 
+[[package]]
+name = "indexmap"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.14.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+
 [[package]]
 name = "js-sys"
 version = "0.3.64"
@@ -444,6 +687,21 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
+dependencies = [
+ "adler",
+]
+
 [[package]]
 name = "nix"
 version = "0.24.3"
@@ -470,10 +728,20 @@ dependencies = [
 ]
 
 [[package]]
-name = "num-traits"
-version = "0.2.15"
+name = "nom"
+version = "7.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
 dependencies = [
  "autocfg",
 ]
@@ -488,6 +756,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "object"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.18.0"
@@ -509,6 +786,41 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "prql-compiler"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
+dependencies = [
+ "anyhow",
+ "ariadne",
+ "chumsky",
+ "csv",
+ "enum-as-inner",
+ "itertools",
+ "lazy_static",
+ "log",
+ "once_cell",
+ "regex",
+ "semver",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "sqlformat",
+ "sqlparser",
+ "strum",
+ "strum_macros",
+]
+
+[[package]]
+name = "psm"
+version = "0.1.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.31"
@@ -589,12 +901,24 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+
 [[package]]
 name = "rustversion"
 version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
 
+[[package]]
+name = "ryu"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -608,10 +932,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
 
 [[package]]
-name = "serde"
-version = "1.0.171"
+name = "semver"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
+checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.27",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
 
 [[package]]
 name = "skim"
@@ -638,12 +1009,74 @@ dependencies = [
  "vte",
 ]
 
+[[package]]
+name = "sqlformat"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
+dependencies = [
+ "itertools",
+ "nom",
+ "unicode_categories",
+]
+
+[[package]]
+name = "sqlparser"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
+dependencies = [
+ "log",
+ "serde",
+]
+
+[[package]]
+name = "stacker"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "psm",
+ "winapi",
+]
+
 [[package]]
 name = "strsim"
 version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
+[[package]]
+name = "strum"
+version = "0.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.24.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "subtle"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
+
 [[package]]
 name = "syn"
 version = "1.0.109"
@@ -657,9 +1090,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.26"
+version = "2.0.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
+checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -688,22 +1121,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.43"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
+checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.43"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
+checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.27",
 ]
 
 [[package]]
@@ -766,6 +1199,12 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "typenum"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.11"
@@ -778,12 +1217,30 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
 
+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
 [[package]]
 name = "vte"
 version = "0.11.1"
@@ -838,7 +1295,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.27",
  "wasm-bindgen-shared",
 ]
 
@@ -860,7 +1317,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.27",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -967,3 +1424,9 @@ name = "windows_x86_64_msvc"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
+
+[[package]]
+name = "yansi"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
index 00000000000..2a2b582cea8
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,12 @@
+# workspace is required to vendor crates for all packages.
+[workspace]
+members = [
+    "BLAKE3",
+    "skim",
+    "prql",
+]
+resolver = "2"
+
+# FIXME: even though the profiles should be defined in the main cargo config we
+# cannot do this yet, since we compile each package separatelly, so you should
+# ignore warning from cargo about this.
diff --git a/rust/prql/Cargo.lock b/rust/prql/Cargo.lock
deleted file mode 100644
index da94e4ca852..00000000000
--- a/rust/prql/Cargo.lock
+++ /dev/null
@@ -1,569 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "_ch_rust_prql"
-version = "0.1.0"
-dependencies = [
- "prql-compiler",
- "serde_json",
-]
-
-[[package]]
-name = "addr2line"
-version = "0.20.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
-dependencies = [
- "gimli",
-]
-
-[[package]]
-name = "adler"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
-
-[[package]]
-name = "ahash"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
-dependencies = [
- "getrandom",
- "once_cell",
- "version_check",
-]
-
-[[package]]
-name = "aho-corasick"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "anyhow"
-version = "1.0.71"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
-dependencies = [
- "backtrace",
-]
-
-[[package]]
-name = "ariadne"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
-dependencies = [
- "unicode-width",
- "yansi",
-]
-
-[[package]]
-name = "backtrace"
-version = "0.3.68"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
-dependencies = [
- "addr2line",
- "cc",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
-]
-
-[[package]]
-name = "cc"
-version = "1.0.79"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "chumsky"
-version = "0.9.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
-dependencies = [
- "hashbrown 0.12.3",
- "stacker",
-]
-
-[[package]]
-name = "csv"
-version = "1.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
-dependencies = [
- "csv-core",
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "csv-core"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "either"
-version = "1.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
-
-[[package]]
-name = "enum-as-inner"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "equivalent"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
-
-[[package]]
-name = "getrandom"
-version = "0.2.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "gimli"
-version = "0.27.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
-
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-dependencies = [
- "ahash",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
-
-[[package]]
-name = "heck"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
-
-[[package]]
-name = "indexmap"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
-dependencies = [
- "equivalent",
- "hashbrown 0.14.0",
-]
-
-[[package]]
-name = "itertools"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
-dependencies = [
- "either",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
-
-[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
-[[package]]
-name = "libc"
-version = "0.2.147"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
-
-[[package]]
-name = "log"
-version = "0.4.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
-
-[[package]]
-name = "memchr"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
-
-[[package]]
-name = "minimal-lexical"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
-
-[[package]]
-name = "miniz_oxide"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
-dependencies = [
- "adler",
-]
-
-[[package]]
-name = "nom"
-version = "7.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
-dependencies = [
- "memchr",
- "minimal-lexical",
-]
-
-[[package]]
-name = "object"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.18.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.63"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "prql-compiler"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
-dependencies = [
- "anyhow",
- "ariadne",
- "chumsky",
- "csv",
- "enum-as-inner",
- "itertools",
- "lazy_static",
- "log",
- "once_cell",
- "regex",
- "semver",
- "serde",
- "serde_json",
- "serde_yaml",
- "sqlformat",
- "sqlparser",
- "strum",
- "strum_macros",
-]
-
-[[package]]
-name = "psm"
-version = "0.1.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
-dependencies = [
- "cc",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "regex"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
-dependencies = [
- "aho-corasick",
- "memchr",
- "regex-automata",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
-dependencies = [
- "aho-corasick",
- "memchr",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-syntax"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
-
-[[package]]
-name = "rustc-demangle"
-version = "0.1.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
-
-[[package]]
-name = "rustversion"
-version = "1.0.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
-
-[[package]]
-name = "ryu"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
-
-[[package]]
-name = "semver"
-version = "1.0.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
-dependencies = [
- "serde",
-]
-
-[[package]]
-name = "serde"
-version = "1.0.166"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.166"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.23",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "serde_yaml"
-version = "0.9.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
-dependencies = [
- "indexmap",
- "itoa",
- "ryu",
- "serde",
- "unsafe-libyaml",
-]
-
-[[package]]
-name = "sqlformat"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
-dependencies = [
- "itertools",
- "nom",
- "unicode_categories",
-]
-
-[[package]]
-name = "sqlparser"
-version = "0.33.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
-dependencies = [
- "log",
- "serde",
-]
-
-[[package]]
-name = "stacker"
-version = "0.1.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
-dependencies = [
- "cc",
- "cfg-if",
- "libc",
- "psm",
- "winapi",
-]
-
-[[package]]
-name = "strum"
-version = "0.24.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
-dependencies = [
- "strum_macros",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.24.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
-
-[[package]]
-name = "unicode-width"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
-
-[[package]]
-name = "unicode_categories"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
-
-[[package]]
-name = "unsafe-libyaml"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
-
-[[package]]
-name = "version_check"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
-
-[[package]]
-name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "yansi"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index 16a58a90dcf..fff2975cea4 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -8,6 +8,7 @@ import shutil
 import subprocess
 import time
 import sys
+from glob import glob
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
@@ -31,6 +32,17 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
 ImagesDict = Dict[str, dict]
 
 
+# workaround for mypy issue [1]:
+#
+#    "Argument 1 to "map" has incompatible type overloaded function" [1]
+#
+#  [1]: https://github.com/python/mypy/issues/9864
+#
+# NOTE: simply lambda will do the trick as well, but pylint will not like it
+def realpath(*args, **kwargs):
+    return os.path.realpath(*args, **kwargs)
+
+
 class DockerImage:
     def __init__(
         self,
@@ -111,8 +123,23 @@ def get_changed_docker_images(
     changed_images = []
 
     for dockerfile_dir, image_description in images_dict.items():
+        source_dir = GITHUB_WORKSPACE.rstrip("/") + "/"
+        dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True)
+        # resolve symlinks
+        dockerfile_files = list(map(realpath, dockerfile_files))
+        # trim prefix to get relative path again, to match with files_changed
+        dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files))
+        logging.info(
+            "Docker %s (source_dir=%s) build context for PR %s @ %s: %s",
+            dockerfile_dir,
+            source_dir,
+            pr_info.number,
+            pr_info.sha,
+            str(dockerfile_files),
+        )
+
         for f in files_changed:
-            if f.startswith(dockerfile_dir):
+            if f in dockerfile_files:
                 name = image_description["name"]
                 only_amd64 = image_description.get("only_amd64", False)
                 logging.info(
@@ -245,6 +272,8 @@ def build_and_push_one_image(
         cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}"
 
     cmd = (
+        # tar is requried to follow symlinks, since docker-build cannot do this
+        f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | "
         "docker buildx build --builder default "
         f"--label build-url={GITHUB_RUN_URL} "
         f"{from_tag_arg}"
@@ -254,7 +283,7 @@ def build_and_push_one_image(
         f"{cache_from} "
         f"--cache-to type=inline,mode=max "
         f"{push_arg}"
-        f"--progress plain {image.full_path}"
+        f"--progress plain -"
     )
     logging.info("Docker command to run: %s", cmd)
     with TeePopen(cmd, build_log) as proc:
diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py
index d5d27f73694..c679ab984ee 100644
--- a/tests/ci/docker_test.py
+++ b/tests/ci/docker_test.py
@@ -126,12 +126,13 @@ class TestDockerImageCheck(unittest.TestCase):
         mock_popen.assert_called_once()
         mock_machine.assert_not_called()
         self.assertIn(
+            "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
             f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
             "--build-arg FROM_TAG=version "
             f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
             "--tag name:version --cache-from type=registry,ref=name:version "
             "--cache-from type=registry,ref=name:latest "
-            "--cache-to type=inline,mode=max --push --progress plain path",
+            "--cache-to type=inline,mode=max --push --progress plain -",
             mock_popen.call_args.args,
         )
         self.assertTrue(result)
@@ -143,12 +144,13 @@ class TestDockerImageCheck(unittest.TestCase):
         mock_popen.assert_called_once()
         mock_machine.assert_not_called()
         self.assertIn(
+            "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
             f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
             "--build-arg FROM_TAG=version2 "
             f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
             "--tag name:version2 --cache-from type=registry,ref=name:version2 "
             "--cache-from type=registry,ref=name:latest "
-            "--cache-to type=inline,mode=max --progress plain path",
+            "--cache-to type=inline,mode=max --progress plain -",
             mock_popen.call_args.args,
         )
         self.assertTrue(result)
@@ -160,11 +162,12 @@ class TestDockerImageCheck(unittest.TestCase):
         mock_popen.assert_called_once()
         mock_machine.assert_not_called()
         self.assertIn(
+            "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
             f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
             f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
             "--tag name:version2 --cache-from type=registry,ref=name:version2 "
             "--cache-from type=registry,ref=name:latest "
-            "--cache-to type=inline,mode=max --progress plain path",
+            "--cache-to type=inline,mode=max --progress plain -",
             mock_popen.call_args.args,
         )
         self.assertFalse(result)
@@ -178,13 +181,14 @@ class TestDockerImageCheck(unittest.TestCase):
         mock_popen.assert_called_once()
         mock_machine.assert_not_called()
         self.assertIn(
+            "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
             f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
             f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
             "--tag name:version2 --cache-from type=registry,ref=name:version2 "
             "--cache-from type=registry,ref=name:latest "
             "--cache-from type=registry,ref=name:cached-version "
             "--cache-from type=registry,ref=name:another-cached "
-            "--cache-to type=inline,mode=max --progress plain path",
+            "--cache-to type=inline,mode=max --progress plain -",
             mock_popen.call_args.args,
         )
         self.assertFalse(result)

From ef0dca626142322fa5420eea8fab491bb53c4ac2 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sun, 23 Jul 2023 00:37:34 +0200
Subject: [PATCH 190/242] fix style

---
 src/Common/OptimizedRegularExpression.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 918ebd75fc0..e636b0b987d 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -433,7 +433,7 @@ try
     for (auto & lit : alternative_literals)
         alternatives.push_back(std::move(lit.literal));
 }
-catch(...)
+catch (...)
 {
     required_substring = "";
     is_trivial = false;

From 00d6f2ee08a3e442363a078b322adab7b6988f91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 04:56:47 +0200
Subject: [PATCH 191/242] Use incbin for resources, part 1

---
 .gitmodules                                   |   3 +
 contrib/CMakeLists.txt                        |   2 +-
 contrib/incbin                                |   1 +
 contrib/incbin-cmake/CMakeLists.txt           |   4 +
 contrib/nlp-data-cmake/CMakeLists.txt         |  15 --
 programs/install/CMakeLists.txt               |   3 +
 programs/install/Install.cpp                  |  13 +-
 programs/keeper/CMakeLists.txt                |  15 --
 programs/keeper/Keeper.cpp                    |   6 +-
 programs/server/CMakeLists.txt                |  12 +-
 programs/server/Server.cpp                    |  11 +-
 programs/server/resources.cpp                 |   0
 src/CMakeLists.txt                            |   6 +-
 src/Common/CMakeLists.txt                     |   2 +-
 src/Common/Config/ConfigProcessor.cpp         |  33 ++--
 src/Common/Config/ConfigProcessor.h           |   3 +
 src/Common/FrequencyHolder.cpp                | 181 ++++++++++++++++++
 src/Common/FrequencyHolder.h                  | 170 +---------------
 src/Daemon/BaseDaemon.cpp                     |   1 -
 src/Server/WebUIRequestHandler.cpp            |  14 +-
 src/Storages/System/CMakeLists.txt            |  12 +-
 .../System/attachInformationSchemaTables.cpp  |  24 ++-
 22 files changed, 268 insertions(+), 263 deletions(-)
 create mode 160000 contrib/incbin
 create mode 100644 contrib/incbin-cmake/CMakeLists.txt
 delete mode 100644 contrib/nlp-data-cmake/CMakeLists.txt
 create mode 100644 programs/server/resources.cpp
 create mode 100644 src/Common/FrequencyHolder.cpp

diff --git a/.gitmodules b/.gitmodules
index ba71a8ae3a7..30085fb8dd4 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -340,3 +340,6 @@
 [submodule "contrib/c-ares"]
 	path = contrib/c-ares
 	url = https://github.com/c-ares/c-ares.git
+[submodule "contrib/incbin"]
+	path = contrib/incbin
+	url = https://github.com/graphitemaster/incbin.git
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 0f68c0cbc7c..fdf6e60e58f 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
 add_contrib (nuraft-cmake NuRaft)
 add_contrib (fast_float-cmake fast_float)
 add_contrib (datasketches-cpp-cmake datasketches-cpp)
+add_contrib (incbin-cmake incbin)
 
 option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
 if (ENABLE_NLP)
     add_contrib (libstemmer-c-cmake libstemmer_c)
     add_contrib (wordnet-blast-cmake wordnet-blast)
     add_contrib (lemmagen-c-cmake lemmagen-c)
-    add_contrib (nlp-data-cmake nlp-data)
     add_contrib (cld2-cmake cld2)
 endif()
 
diff --git a/contrib/incbin b/contrib/incbin
new file mode 160000
index 00000000000..6e576cae5ab
--- /dev/null
+++ b/contrib/incbin
@@ -0,0 +1 @@
+Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..e64ebc99c73
--- /dev/null
+++ b/contrib/incbin-cmake/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
+add_library(_incbin INTERFACE)
+target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
+add_library(ch_contrib::incbin ALIAS _incbin)
diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt
deleted file mode 100644
index 5380269c479..00000000000
--- a/contrib/nlp-data-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
-set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
-
-add_library (_nlp_data INTERFACE)
-
-clickhouse_embed_binaries(
-    TARGET nlp_dictionaries
-    RESOURCE_DIR "${LIBRARY_DIR}"
-    RESOURCES charset.zst tonality_ru.zst programming.zst
-)
-
-add_dependencies(_nlp_data nlp_dictionaries)
-target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
-add_library(ch_contrib::nlp_data ALIAS _nlp_data)
diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt
index c3f4d96d631..f3f562bab7c 100644
--- a/programs/install/CMakeLists.txt
+++ b/programs/install/CMakeLists.txt
@@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK
 )
 
 clickhouse_program_add_library(install)
+
+# For incbin
+target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index d83e189f7ef..da2c95af62c 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -20,10 +20,7 @@
 #include <Common/formatReadable.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/OpenSSLHelpers.h>
-#include <base/hex.h>
-#include <Common/getResource.h>
 #include <base/sleep.h>
-#include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/WriteBufferFromFile.h>
@@ -35,6 +32,12 @@
 
 #include <Poco/Util/XMLConfiguration.h>
 
+#include <incbin.h>
+
+/// Embedded configuration files used inside the install program
+INCBIN(resource_config_xml, "config.xml");
+INCBIN(resource_users_xml, "users.xml");
+
 
 /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
   * It also allows to avoid dependency on systemd, upstart, SysV init.
@@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
         if (!fs::exists(main_config_file))
         {
-            std::string_view main_config_content = getResource("config.xml");
+            std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
             if (main_config_content.empty())
             {
                 fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
         if (!fs::exists(users_config_file))
         {
-            std::string_view users_config_content = getResource("users.xml");
+            std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
             if (users_config_content.empty())
             {
                 fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 940e6848597..317e35959aa 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -1,16 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
-if (OS_LINUX)
-    set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
-    # for some reason INTERFACE linkage doesn't work for standalone binary
-    set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
-endif ()
-
-clickhouse_embed_binaries(
-    TARGET clickhouse_keeper_configs
-    RESOURCES keeper_config.xml keeper_embedded.xml
-)
-
 set(CLICKHOUSE_KEEPER_SOURCES
     Keeper.cpp
 )
@@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
 clickhouse_program_add(keeper)
 
 install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
-add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
 
 if (BUILD_STANDALONE_KEEPER)
     # Straight list of all required sources
@@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
         ${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
     )
 
-    add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
     set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
 
     if (SPLIT_DEBUG_SYMBOLS)
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 6034d63a016..a38467c3369 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -457,8 +457,10 @@ try
     const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
 
     std::vector<std::string> extra_paths = {include_from_path};
-    if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
-    if (!key_path.empty()) extra_paths.emplace_back(key_path);
+    if (!cert_path.empty())
+        extra_paths.emplace_back(cert_path);
+    if (!key_path.empty())
+        extra_paths.emplace_back(key_path);
 
     /// ConfigReloader have to strict parameters which are redundant in our case
     auto main_config_reloader = std::make_unique<ConfigReloader>(
diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt
index 855973d10e1..e008e65acf6 100644
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@@ -1,12 +1,8 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
 set(CLICKHOUSE_SERVER_SOURCES
     MetricsTransmitter.cpp
     Server.cpp
 )
 
-set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
-
 set (CLICKHOUSE_SERVER_LINK
     PRIVATE
         clickhouse_aggregate_functions
@@ -33,10 +29,4 @@ endif()
 
 clickhouse_program_add(server)
 
-install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
-
-clickhouse_embed_binaries(
-    TARGET clickhouse_server_configs
-    RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
-)
-add_dependencies(clickhouse-server-lib clickhouse_server_configs)
+target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 33fdcc9c1a8..229a169dc1e 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -128,6 +128,10 @@
 #   include <azure/storage/common/internal/xml_wrapper.hpp>
 #endif
 
+#include <incbin.h>
+/// A minimal file used when the server is run without installation
+INCBIN(resource_embedded_xml, "embedded.xml");
+
 namespace CurrentMetrics
 {
     extern const Metric Revision;
@@ -393,6 +397,7 @@ int Server::run()
 
 void Server::initialize(Poco::Util::Application & self)
 {
+    ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
     BaseDaemon::initialize(self);
     logger().information("starting up");
 
@@ -1105,8 +1110,10 @@ try
     const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
 
     std::vector<std::string> extra_paths = {include_from_path};
-    if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
-    if (!key_path.empty()) extra_paths.emplace_back(key_path);
+    if (!cert_path.empty())
+        extra_paths.emplace_back(cert_path);
+    if (!key_path.empty())
+        extra_paths.emplace_back(key_path);
 
     auto main_config_reloader = std::make_unique<ConfigReloader>(
         config_path,
diff --git a/programs/server/resources.cpp b/programs/server/resources.cpp
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f870993f080..fda8bafde59 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
     target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
 endif()
 
-target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
+target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
 
 add_subdirectory(Access/Common)
 add_subdirectory(Common/ZooKeeper)
@@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
     endforeach ()
 endmacro ()
 
-dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
+dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
 target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
 
 if (TARGET ch_contrib::llvm)
@@ -561,7 +561,7 @@ if (ENABLE_NLP)
     dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
     dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
     dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
-    dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
+    target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
 endif()
 
 if (TARGET ch_contrib::ulid)
diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt
index e527b3dec43..b83c8431f0a 100644
--- a/src/Common/CMakeLists.txt
+++ b/src/Common/CMakeLists.txt
@@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
 endif()
 
 if (ENABLE_MYSQL)
-    add_subdirectory (mysqlxx)
+    add_subdirectory(mysqlxx)
 endif ()
diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index 5bbc8eae0de..c3a8f69cf3f 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -83,6 +83,13 @@ ConfigProcessor::~ConfigProcessor()
         Poco::Logger::destroy("ConfigProcessor");
 }
 
+static std::unordered_map<std::string, std::string_view> embedded_configs;
+
+void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
+{
+    embedded_configs[name] = content;
+}
+
 
 /// Vector containing the name of the element and a sorted list of attribute names and values
 /// (except "remove" and "replace" attributes).
@@ -281,15 +288,15 @@ void ConfigProcessor::doIncludesRecursive(
         {
             std::string value = node->nodeValue();
 
-            bool replace_occured = false;
+            bool replace_occurred = false;
             size_t pos;
             while ((pos = value.find(substitution.first)) != std::string::npos)
             {
                 value.replace(pos, substitution.first.length(), substitution.second);
-                replace_occured = true;
+                replace_occurred = true;
             }
 
-            if (replace_occured)
+            if (replace_occurred)
                 node->setNodeValue(value);
         }
     }
@@ -528,26 +535,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
     }
     else
     {
-        /// These embedded files added during build with some cmake magic.
-        /// Look at the end of programs/server/CMakeLists.txt.
-        std::string embedded_name;
-        if (path == "config.xml")
-            embedded_name = "embedded.xml";
-
-        if (path == "keeper_config.xml")
-            embedded_name = "keeper_embedded.xml";
-
-        /// When we can use config embedded in binary.
-        if (!embedded_name.empty())
+        /// When we can use a config embedded in the binary.
+        if (auto it = embedded_configs.find(path); it != embedded_configs.end())
         {
-            auto resource = getResource(embedded_name);
-            if (resource.empty())
-                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
             LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
-            config = dom_parser.parseMemory(resource.data(), resource.size());
+            config = dom_parser.parseMemory(it->second.data(), it->second.size());
         }
         else
-            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
+            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
     }
 
     std::vector<std::string> contributing_files;
diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h
index 0ca3e46db88..eefe65ef06c 100644
--- a/src/Common/Config/ConfigProcessor.h
+++ b/src/Common/Config/ConfigProcessor.h
@@ -65,6 +65,9 @@ public:
         zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
         const zkutil::EventPtr & zk_changed_event = nullptr);
 
+    /// These configurations will be used if there is no configuration file.
+    static void registerEmbeddedConfig(std::string name, std::string_view content);
+
 
     /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
     /// The resulting XML document is saved into a file with the name
diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp
new file mode 100644
index 00000000000..3b755cacacb
--- /dev/null
+++ b/src/Common/FrequencyHolder.cpp
@@ -0,0 +1,181 @@
+#include <Common/FrequencyHolder.h>
+
+#include <incbin.h>
+
+/// Embedded SQL definitions
+INCBIN(resource_charset_zst, "charset.zst");
+INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
+INCBIN(resource_programming_zst, "programming.zst");
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int FILE_DOESNT_EXIST;
+}
+
+
+FrequencyHolder & FrequencyHolder::getInstance()
+{
+    static FrequencyHolder instance;
+    return instance;
+}
+
+FrequencyHolder::FrequencyHolder()
+{
+    loadEmotionalDict();
+    loadEncodingsFrequency();
+    loadProgrammingFrequency();
+}
+
+void FrequencyHolder::loadEncodingsFrequency()
+{
+    Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
+
+    LOG_TRACE(log, "Loading embedded charset frequencies");
+
+    std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
+    if (resource.empty())
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
+
+    String line;
+    UInt16 bigram;
+    Float64 frequency;
+    String charset_name;
+
+    auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
+    ZstdInflatingReadBuffer in(std::move(buf));
+
+    while (!in.eof())
+    {
+        readString(line, in);
+        in.ignore();
+
+        if (line.empty())
+            continue;
+
+        ReadBufferFromString buf_line(line);
+
+        // Start loading a new charset
+        if (line.starts_with("// "))
+        {
+            // Skip "// "
+            buf_line.ignore(3);
+            readString(charset_name, buf_line);
+
+            /* In our dictionary we have lines with form: <Language>_<Charset>
+            * If we need to find language of data, we return <Language>
+            * If we need to find charset of data, we return <Charset>.
+            */
+            size_t sep = charset_name.find('_');
+
+            Encoding enc;
+            enc.lang = charset_name.substr(0, sep);
+            enc.name = charset_name.substr(sep + 1);
+            encodings_freq.push_back(std::move(enc));
+        }
+        else
+        {
+            readIntText(bigram, buf_line);
+            buf_line.ignore();
+            readFloatText(frequency, buf_line);
+
+            encodings_freq.back().map[bigram] = frequency;
+        }
+    }
+    LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
+}
+
+void FrequencyHolder::loadEmotionalDict()
+{
+    Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
+    LOG_TRACE(log, "Loading embedded emotional dictionary");
+
+    std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
+    if (resource.empty())
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
+
+    String line;
+    String word;
+    Float64 tonality;
+    size_t count = 0;
+
+    auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
+    ZstdInflatingReadBuffer in(std::move(buf));
+
+    while (!in.eof())
+    {
+        readString(line, in);
+        in.ignore();
+
+        if (line.empty())
+            continue;
+
+        ReadBufferFromString buf_line(line);
+
+        readStringUntilWhitespace(word, buf_line);
+        buf_line.ignore();
+        readFloatText(tonality, buf_line);
+
+        StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
+        emotional_dict[ref] = tonality;
+        ++count;
+    }
+    LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
+}
+
+void FrequencyHolder::loadProgrammingFrequency()
+{
+    Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
+
+    LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
+
+    std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
+    if (resource.empty())
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
+
+    String line;
+    String bigram;
+    Float64 frequency;
+    String programming_language;
+
+    auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
+    ZstdInflatingReadBuffer in(std::move(buf));
+
+    while (!in.eof())
+    {
+        readString(line, in);
+        in.ignore();
+
+        if (line.empty())
+            continue;
+
+        ReadBufferFromString buf_line(line);
+
+        // Start loading a new language
+        if (line.starts_with("// "))
+        {
+            // Skip "// "
+            buf_line.ignore(3);
+            readString(programming_language, buf_line);
+
+            Language lang;
+            lang.name = programming_language;
+            programming_freq.push_back(std::move(lang));
+        }
+        else
+        {
+            readStringUntilWhitespace(bigram, buf_line);
+            buf_line.ignore();
+            readFloatText(frequency, buf_line);
+
+            StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
+            programming_freq.back().map[ref] = frequency;
+        }
+    }
+    LOG_TRACE(log, "Programming languages frequencies was added");
+}
+
+}
diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h
index 74098598441..270e4dbbd2a 100644
--- a/src/Common/FrequencyHolder.h
+++ b/src/Common/FrequencyHolder.h
@@ -7,7 +7,6 @@
 #include <unordered_map>
 
 #include <Common/Arena.h>
-#include <Common/getResource.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <IO/ReadBufferFromFile.h>
@@ -20,11 +19,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int FILE_DOESNT_EXIST;
-}
-
 /// FrequencyHolder class is responsible for storing and loading dictionaries
 /// needed for text classification functions:
 ///
@@ -56,11 +50,7 @@ public:
     using EncodingMap = HashMap<UInt16, Float64>;
     using EncodingContainer = std::vector<Encoding>;
 
-    static FrequencyHolder & getInstance()
-    {
-        static FrequencyHolder instance;
-        return instance;
-    }
+    static FrequencyHolder & getInstance();
 
     const Map & getEmotionalDict() const
     {
@@ -78,161 +68,11 @@ public:
     }
 
 private:
+    FrequencyHolder();
 
-    FrequencyHolder()
-    {
-        loadEmotionalDict();
-        loadEncodingsFrequency();
-        loadProgrammingFrequency();
-    }
-
-    void loadEncodingsFrequency()
-    {
-        Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
-
-        LOG_TRACE(log, "Loading embedded charset frequencies");
-
-        auto resource = getResource("charset.zst");
-            if (resource.empty())
-                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
-
-        String line;
-        UInt16 bigram;
-        Float64 frequency;
-        String charset_name;
-
-        auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
-        ZstdInflatingReadBuffer in(std::move(buf));
-
-        while (!in.eof())
-        {
-            readString(line, in);
-            in.ignore();
-
-            if (line.empty())
-                continue;
-
-            ReadBufferFromString buf_line(line);
-
-            // Start loading a new charset
-            if (line.starts_with("// "))
-            {
-                // Skip "// "
-                buf_line.ignore(3);
-                readString(charset_name, buf_line);
-
-                /* In our dictionary we have lines with form: <Language>_<Charset>
-                * If we need to find language of data, we return <Language>
-                * If we need to find charset of data, we return <Charset>.
-                */
-                size_t sep = charset_name.find('_');
-
-                Encoding enc;
-                enc.lang = charset_name.substr(0, sep);
-                enc.name = charset_name.substr(sep + 1);
-                encodings_freq.push_back(std::move(enc));
-            }
-            else
-            {
-                readIntText(bigram, buf_line);
-                buf_line.ignore();
-                readFloatText(frequency, buf_line);
-
-                encodings_freq.back().map[bigram] = frequency;
-            }
-        }
-        LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
-    }
-
-    void loadEmotionalDict()
-    {
-        Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
-        LOG_TRACE(log, "Loading embedded emotional dictionary");
-
-        auto resource = getResource("tonality_ru.zst");
-            if (resource.empty())
-                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
-
-        String line;
-        String word;
-        Float64 tonality;
-        size_t count = 0;
-
-        auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
-        ZstdInflatingReadBuffer in(std::move(buf));
-
-        while (!in.eof())
-        {
-            readString(line, in);
-            in.ignore();
-
-            if (line.empty())
-                continue;
-
-            ReadBufferFromString buf_line(line);
-
-            readStringUntilWhitespace(word, buf_line);
-            buf_line.ignore();
-            readFloatText(tonality, buf_line);
-
-            StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
-            emotional_dict[ref] = tonality;
-            ++count;
-        }
-        LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
-    }
-
-    void loadProgrammingFrequency()
-    {
-        Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
-
-        LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
-
-        auto resource = getResource("programming.zst");
-            if (resource.empty())
-                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
-
-        String line;
-        String bigram;
-        Float64 frequency;
-        String programming_language;
-
-        auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
-        ZstdInflatingReadBuffer in(std::move(buf));
-
-        while (!in.eof())
-        {
-            readString(line, in);
-            in.ignore();
-
-            if (line.empty())
-                continue;
-
-            ReadBufferFromString buf_line(line);
-
-            // Start loading a new language
-            if (line.starts_with("// "))
-            {
-                // Skip "// "
-                buf_line.ignore(3);
-                readString(programming_language, buf_line);
-
-                Language lang;
-                lang.name = programming_language;
-                programming_freq.push_back(std::move(lang));
-            }
-            else
-            {
-                readStringUntilWhitespace(bigram, buf_line);
-                buf_line.ignore();
-                readFloatText(frequency, buf_line);
-
-                StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
-                programming_freq.back().map[ref] = frequency;
-            }
-        }
-        LOG_TRACE(log, "Programming languages frequencies was added");
-    }
+    void loadEncodingsFrequency();
+    void loadEmotionalDict();
+    void loadProgrammingFrequency();
 
     Arena string_pool;
 
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 3852ec5ada5..f61ca054b2a 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -38,7 +38,6 @@
 #include <base/coverage.h>
 #include <base/sleep.h>
 
-#include <IO/WriteBufferFromFile.h>
 #include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index 3997e0f19b6..cb9e8935d8c 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -6,10 +6,16 @@
 #include <Poco/Util/LayeredConfiguration.h>
 
 #include <IO/HTTPCommon.h>
-#include <Common/getResource.h>
 
 #include <re2/re2.h>
 
+#include <incbin.h>
+
+/// Embedded HTML pages
+INCBIN(resource_play_html, "play.html");
+INCBIN(resource_dashboard_html, "dashboard.html");
+INCBIN(resource_uplot_js, "js/uplot.js");
+
 
 namespace DB
 {
@@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
     if (request.getURI().starts_with("/play"))
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-        *response.send() << getResource("play.html");
+        *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
     }
     else if (request.getURI().starts_with("/dashboard"))
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
 
-        std::string html(getResource("dashboard.html"));
+        std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
 
         /// Replace a link to external JavaScript file to embedded file.
         /// This allows to open the HTML without running a server and to host it on server.
@@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
     else if (request.getURI() == "/js/uplot.js")
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-        *response.send() << getResource("js/uplot.js");
+        *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
     }
     else
     {
diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt
index 1d2a3de5101..6b7d1739e33 100644
--- a/src/Storages/System/CMakeLists.txt
+++ b/src/Storages/System/CMakeLists.txt
@@ -43,18 +43,9 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
 # Overlength strings
 set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
 
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-clickhouse_embed_binaries(
-        TARGET information_schema_metadata
-        RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
-        RESOURCES schemata.sql tables.sql views.sql columns.sql
-)
-
 list (SORT storages_system_sources) # Reproducible build
 add_library(clickhouse_storages_system ${storages_system_sources})
 
-add_dependencies(clickhouse_storages_system information_schema_metadata)
-
 target_link_libraries(clickhouse_storages_system PRIVATE
     dbms
     common
@@ -62,5 +53,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
     clickhouse_common_zookeeper
     clickhouse_parsers
     Poco::JSON
-    INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
 )
+
+target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)
diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp
index 61a91685324..bfc5c8c64e2 100644
--- a/src/Storages/System/attachInformationSchemaTables.cpp
+++ b/src/Storages/System/attachInformationSchemaTables.cpp
@@ -3,14 +3,21 @@
 #include <Storages/System/attachSystemTablesImpl.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/parseQuery.h>
-#include <Common/getResource.h>
+#include <incbin.h>
+
+/// Embedded SQL definitions
+INCBIN(resource_schemata_sql, "schemata.sql");
+INCBIN(resource_tables_sql, "tables.sql");
+INCBIN(resource_views_sql, "views.sql");
+INCBIN(resource_columns_sql, "columns.sql");
+
 
 namespace DB
 {
 
 /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
 
-static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
+static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
 {
     try
     {
@@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
         bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
 
         String metadata_resource_name = view_name + ".sql";
-        auto attach_query = getResource(metadata_resource_name);
-        if (attach_query.empty())
+        if (query.empty())
             return;
 
         ParserCreateQuery parser;
-        ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
+        ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
                                 "Attach query from embedded resource " + metadata_resource_name,
                                 DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
 
@@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
 
 void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
 {
-    createInformationSchemaView(context, information_schema_database, "schemata");
-    createInformationSchemaView(context, information_schema_database, "tables");
-    createInformationSchemaView(context, information_schema_database, "views");
-    createInformationSchemaView(context, information_schema_database, "columns");
+    createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
 }
 
 }

From 4170d1458bdbccafe2f8cb2c671ee044b3efe9ba Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 05:25:14 +0200
Subject: [PATCH 192/242] Use incbin for resources, part 2

---
 cmake/embed_binary.cmake                      | 58 ------------------
 contrib/cctz-cmake/CMakeLists.txt             | 45 ++++++++------
 src/Common/Config/ConfigProcessor.cpp         |  1 -
 src/Common/DateLUTImpl.cpp                    | 17 ++++--
 src/Common/SymbolIndex.cpp                    | 61 +++----------------
 src/Common/SymbolIndex.h                      | 30 +--------
 src/Common/getResource.cpp                    | 52 ----------------
 src/Common/getResource.h                      |  7 ---
 src/Common/tests/gtest_DateLUTImpl.cpp        | 14 ++---
 .../System/StorageSystemTimeZones.cpp         |  7 ++-
 10 files changed, 58 insertions(+), 234 deletions(-)
 delete mode 100644 cmake/embed_binary.cmake
 delete mode 100644 src/Common/getResource.cpp
 delete mode 100644 src/Common/getResource.h

diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake
deleted file mode 100644
index e5428c24939..00000000000
--- a/cmake/embed_binary.cmake
+++ /dev/null
@@ -1,58 +0,0 @@
-# Embed a set of resource files into a resulting object file.
-#
-# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
-#
-# This will generate a static library target named `<target>`, which contains the contents of
-# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
-# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
-#
-# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
-# These are:
-#   1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
-#   2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
-#   2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
-#
-# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
-# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
-# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
-macro(clickhouse_embed_binaries)
-    set(one_value_args TARGET RESOURCE_DIR)
-    set(resources RESOURCES)
-    cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
-
-    if (NOT DEFINED EMBED_TARGET)
-        message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
-    endif()
-
-    if (NOT DEFINED EMBED_RESOURCE_DIR)
-        set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-    endif()
-
-    list(LENGTH EMBED_RESOURCES N_RESOURCES)
-    if (N_RESOURCES LESS 1)
-        message(FATAL_ERROR "The list of binary resources to embed may not be empty")
-    endif()
-
-    add_library("${EMBED_TARGET}" STATIC)
-    set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
-
-    set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
-
-    foreach(RESOURCE_FILE ${EMBED_RESOURCES})
-        set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
-        set(BINARY_FILE_NAME "${RESOURCE_FILE}")
-
-        # Normalize the name of the resource.
-        string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
-        string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
-
-        # Generate the configured assembly file in the output directory.
-        configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
-
-        # Set the include directory for relative paths specified for `.incbin` directive.
-        set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
-
-        target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
-        set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
-    endforeach()
-endmacro()
diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 10070fbd949..8aa3c7886db 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -1,4 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
 set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
 
 set (SRCS
@@ -23,12 +22,10 @@ if (OS_FREEBSD)
 endif ()
 
 # Related to time_zones table:
-# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
-# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
-# as the library that's built using embedded tzdata is also specific to OS_LINUX
-set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
+# TimeZones.generated.cpp is autogenerated each time during a build
+set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
 # remove existing copies so that its generated fresh on each build.
-file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
+file(REMOVE ${TIMEZONES_FILE})
 
 # get the list of timezones from tzdata shipped with cctz
 set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@@ -36,28 +33,36 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
 set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
 message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
 
-set(TIMEZONE_RESOURCE_FILES)
-
 # each file in that dir (except of tab and localtime) store the info about timezone
 execute_process(COMMAND
     bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
     OUTPUT_STRIP_TRAILING_WHITESPACE
     OUTPUT_VARIABLE TIMEZONES)
 
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
+file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
+file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
 
+set (COUNTER 1)
 foreach(TIMEZONE ${TIMEZONES})
-    file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "    \"${TIMEZONE}\",\n")
-    list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
+    file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n")
+    MATH(EXPR COUNTER "${COUNTER}+1")
 endforeach(TIMEZONE)
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "    nullptr};\n")
-clickhouse_embed_binaries(
-    TARGET tzdata
-    RESOURCE_DIR "${TZDIR}"
-    RESOURCES ${TIMEZONE_RESOURCE_FILES}
-)
-add_dependencies(_cctz tzdata)
-target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
+
+file(APPEND ${TIMEZONES_FILE} "#include <cstddef>\n")
+file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n")
+file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" )
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+    file(APPEND ${TIMEZONES_FILE} "    {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n")
+    MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} "    {nullptr, nullptr, 0}};\n")
+
+add_library (tzdata ${TIMEZONES_FILE})
+target_link_libraries(tzdata ch_contrib::incbin)
+target_include_directories(tzdata PRIVATE ${TZDIR})
+target_link_libraries(_cctz tzdata)
 
 add_library(ch_contrib::cctz ALIAS _cctz)
diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index c3a8f69cf3f..bda181eceeb 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -19,7 +19,6 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>
-#include <Common/getResource.h>
 #include <Common/XMLUtils.h>
 #include <Common/logger_useful.h>
 #include <base/errnoToString.h>
diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp
index 8146b35cc5f..3619462e79b 100644
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@@ -3,7 +3,6 @@
 #include <cctz/civil_time.h>
 #include <cctz/time_zone.h>
 #include <cctz/zone_info_source.h>
-#include <Common/getResource.h>
 #include <Poco/Exception.h>
 
 #include <algorithm>
@@ -13,6 +12,10 @@
 #include <memory>
 
 
+/// Embedded timezones.
+struct TimeZone { const char * name; const unsigned char * data; size_t size; };
+extern TimeZone auto_time_zones[];
+
 namespace
 {
 
@@ -249,9 +252,15 @@ namespace cctz_extension
             const std::string & name,
             const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
         {
-            std::string_view resource = getResource(name);
-            if (!resource.empty())
-                return std::make_unique<Source>(resource.data(), resource.size());
+            const TimeZone * timezone = auto_time_zones;
+            while (timezone->name != nullptr)
+            {
+                if (timezone->name == name)
+                    break;
+                ++timezone;
+            }
+            if (timezone->size)
+                return std::make_unique<Source>(reinterpret_cast<const char *>(timezone->data), timezone->size);
 
             return fallback(name);
         }
diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp
index cb02bb3ff75..ac406538033 100644
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@@ -87,50 +87,13 @@ namespace
 /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
 
 
-void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
-{
-    const char * char_address = static_cast<const char *>(address);
-
-    if (name.starts_with("_binary_") || name.starts_with("binary_"))
-    {
-        if (name.ends_with("_start"))
-        {
-            name = name.substr((name[0] == '_') + strlen("binary_"));
-            name = name.substr(0, name.size() - strlen("_start"));
-
-            auto & resource = resources[name];
-            if (!resource.base_address || resource.base_address == base_address)
-            {
-                resource.base_address = base_address;
-                resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
-                resource.object_name = object_name;
-            }
-        }
-        if (name.ends_with("_end"))
-        {
-            name = name.substr((name[0] == '_') + strlen("binary_"));
-            name = name.substr(0, name.size() - strlen("_end"));
-
-            auto & resource = resources[name];
-            if (!resource.base_address || resource.base_address == base_address)
-            {
-                resource.base_address = base_address;
-                resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
-                resource.object_name = object_name;
-            }
-        }
-    }
-}
-
-
 /// Based on the code of musl-libc and the answer of Kanalpiroge on
 /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
 /// It does not extract all the symbols (but only public - exported and used for dynamic linking),
 /// but will work if we cannot find or parse ELF files.
 void collectSymbolsFromProgramHeaders(
     dl_phdr_info * info,
-    std::vector<SymbolIndex::Symbol> & symbols,
-    SymbolIndex::Resources & resources)
+    std::vector<SymbolIndex::Symbol> & symbols)
 {
     /* Iterate over all headers of the current shared lib
      * (first call is for the executable itself)
@@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders(
                     /// We are not interested in empty symbols.
                     if (elf_sym[sym_index].st_size)
                         symbols.push_back(symbol);
-
-                    /// But resources can be represented by a pair of empty symbols (indicating their boundaries).
-                    updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
                 }
 
                 break;
@@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable(
     const Elf & elf,
     const Elf::Section & symbol_table,
     const Elf::Section & string_table,
-    std::vector<SymbolIndex::Symbol> & symbols,
-    SymbolIndex::Resources & resources)
+    std::vector<SymbolIndex::Symbol> & symbols)
 {
     /// Iterate symbol table.
     const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
@@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable(
 
         if (symbol_table_entry->st_size)
             symbols.push_back(symbol);
-
-        updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
     }
 }
 
@@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
     const Elf & elf,
     unsigned section_header_type,
     const char * string_table_name,
-    std::vector<SymbolIndex::Symbol> & symbols,
-    SymbolIndex::Resources & resources)
+    std::vector<SymbolIndex::Symbol> & symbols)
 {
     std::optional<Elf::Section> symbol_table;
     std::optional<Elf::Section> string_table;
@@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
         return false;
     }
 
-    collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
+    collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
     return true;
 }
 
@@ -351,7 +307,6 @@ void collectSymbolsFromELF(
     dl_phdr_info * info,
     std::vector<SymbolIndex::Symbol> & symbols,
     std::vector<SymbolIndex::Object> & objects,
-    SymbolIndex::Resources & resources,
     String & build_id)
 {
     String object_name;
@@ -462,11 +417,11 @@ void collectSymbolsFromELF(
     object.name = object_name;
     objects.push_back(std::move(object));
 
-    searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
+    searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
 
     /// Unneeded if they were parsed from "program headers" of loaded objects.
 #if defined USE_MUSL
-    searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
+    searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
 #endif
 }
 
@@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
 {
     SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
 
-    collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
-    collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
+    collectSymbolsFromProgramHeaders(info, data.symbols);
+    collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
 
     /* Continue iterations */
     return 0;
diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h
index 4fd108434d5..8c7b8971805 100644
--- a/src/Common/SymbolIndex.h
+++ b/src/Common/SymbolIndex.h
@@ -8,6 +8,7 @@
 #include <Common/Elf.h>
 #include <boost/noncopyable.hpp>
 
+
 namespace DB
 {
 
@@ -45,44 +46,15 @@ public:
     const std::vector<Symbol> & symbols() const { return data.symbols; }
     const std::vector<Object> & objects() const { return data.objects; }
 
-    std::string_view getResource(String name) const
-    {
-        if (auto it = data.resources.find(name); it != data.resources.end())
-            return it->second.data();
-        return {};
-    }
-
     /// The BuildID that is generated by compiler.
     String getBuildID() const { return data.build_id; }
     String getBuildIDHex() const;
 
-    struct ResourcesBlob
-    {
-        /// Symbol can be presented in multiple shared objects,
-        /// base_address will be used to compare only symbols from the same SO.
-        ElfW(Addr) base_address = 0;
-        /// Just a human name of the SO.
-        std::string_view object_name;
-        /// Data blob.
-        std::string_view start;
-        std::string_view end;
-
-        std::string_view data() const
-        {
-            assert(end.data() >= start.data());
-            return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
-        }
-    };
-    using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
-
     struct Data
     {
         std::vector<Symbol> symbols;
         std::vector<Object> objects;
         String build_id;
-
-        /// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
-        Resources resources;
     };
 private:
     Data data;
diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp
deleted file mode 100644
index 72ba24c2f44..00000000000
--- a/src/Common/getResource.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "getResource.h"
-#include <dlfcn.h>
-#include <string>
-#include <boost/algorithm/string/replace.hpp>
-#include <Common/SymbolIndex.h>
-
-
-std::string_view getResource(std::string_view name)
-{
-    // Convert the resource file name into the form generated by `ld -r -b binary`.
-    std::string name_replaced(name);
-    std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
-    std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
-    std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
-    boost::replace_all(name_replaced, "+", "_PLUS_");
-
-#if defined USE_MUSL
-    /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
-    return DB::SymbolIndex::instance().getResource(name_replaced);
-
-#else
-    // In most `dlsym(3)` APIs, one passes the symbol name as it appears via
-    // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
-    // looked up with the string `"_foo"`.
-    //
-    // Apple's linker is confusingly different. The NOTES on the man page for
-    // `dlsym(3)` claim that one looks up the symbol with "the name used in C
-    // source code". In this example, that would mean using the string `"foo"`.
-    // This apparently applies even in the case where the symbol did not originate
-    // from C source, such as the embedded binary resource files used here. So
-    // the symbol name must not have a leading `_` on Apple platforms. It's not
-    // clear how this applies to other symbols, such as those which _have_ a leading
-    // underscore in them by design, many leading underscores, etc.
-#if defined OS_DARWIN
-    std::string prefix = "binary_";
-#else
-    std::string prefix = "_binary_";
-#endif
-    std::string symbol_name_start = prefix + name_replaced + "_start";
-    std::string symbol_name_end = prefix + name_replaced + "_end";
-
-    const char * sym_start = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
-    const char * sym_end = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
-
-    if (sym_start && sym_end)
-    {
-        auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
-        return { sym_start, resource_size };
-    }
-    return {};
-#endif
-}
diff --git a/src/Common/getResource.h b/src/Common/getResource.h
deleted file mode 100644
index 8975cc7841e..00000000000
--- a/src/Common/getResource.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#pragma once
-
-#include <string_view>
-
-/// Get resource from binary if exists. Otherwise return empty string view.
-/// Resources are data that is embedded into executable at link time.
-std::string_view getResource(std::string_view name);
diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index 04f63403ec2..b09319c78d6 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -15,7 +15,8 @@
 #endif
 
 // All timezones present at build time and embedded into ClickHouse binary.
-extern const char * auto_time_zones[];
+struct TimeZone { const char * name; const unsigned char * data; size_t size; };
+extern TimeZone auto_time_zones[];
 
 namespace
 {
@@ -32,14 +33,14 @@ std::vector<const char*> allTimezones(bool with_weird_offsets = true)
 {
     std::vector<const char*> result;
 
-    const auto * timezone_name = auto_time_zones;
-    while (*timezone_name)
+    const TimeZone * timezone = auto_time_zones;
+    while (timezone->name)
     {
-        bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia");
+        bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia");
 
         if (!weird_offsets || with_weird_offsets)
-            result.push_back(*timezone_name);
-        ++timezone_name;
+            result.push_back(timezone->name);
+        ++timezone;
     }
 
     return result;
@@ -548,4 +549,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
 //            {0, 0 + 11 * 3600 * 24 + 12, 11},
         }))
 );
-
diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp
index dc3711812a6..41227ab7780 100644
--- a/src/Storages/System/StorageSystemTimeZones.cpp
+++ b/src/Storages/System/StorageSystemTimeZones.cpp
@@ -4,7 +4,8 @@
 #include <DataTypes/DataTypeString.h>
 
 
-extern const char * auto_time_zones[];
+struct TimeZone { const char * name; const unsigned char * data; size_t size; };
+extern TimeZone auto_time_zones[];
 
 namespace DB
 {
@@ -17,7 +18,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes()
 
 void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
 {
-    for (auto * it = auto_time_zones; *it; ++it)
-        res_columns[0]->insert(String(*it));
+    for (auto * it = auto_time_zones; it->name != nullptr; ++it)
+        res_columns[0]->insert(String(it->name));
 }
 }

From c8f8a23c71dc88ab53318be369ca17b528047b05 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 06:09:15 +0200
Subject: [PATCH 193/242] Fix errors

---
 contrib/cctz-cmake/CMakeLists.txt             | 21 +++++++++++++------
 src/Common/DateLUTImpl.cpp                    | 18 +++++++---------
 src/Common/tests/gtest_DateLUTImpl.cpp        | 13 ++++++------
 src/Storages/System/CMakeLists.txt            |  2 --
 .../System/StorageSystemTimeZones.cpp         |  7 +++----
 5 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 8aa3c7886db..7edeada6e59 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -48,17 +48,26 @@ foreach(TIMEZONE ${TIMEZONES})
     MATH(EXPR COUNTER "${COUNTER}+1")
 endforeach(TIMEZONE)
 
-file(APPEND ${TIMEZONES_FILE} "#include <cstddef>\n")
-file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n")
-file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" )
+file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
 
-set (COUNTER 1)
 foreach(TIMEZONE ${TIMEZONES})
-    file(APPEND ${TIMEZONES_FILE} "    {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n")
+    file(APPEND ${TIMEZONES_FILE} "    \"${TIMEZONE}\",\n")
     MATH(EXPR COUNTER "${COUNTER}+1")
 endforeach(TIMEZONE)
 
-file(APPEND ${TIMEZONES_FILE} "    {nullptr, nullptr, 0}};\n")
+file(APPEND ${TIMEZONES_FILE} "};\n\n")
+
+file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
+file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+    file(APPEND ${TIMEZONES_FILE} "    if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
+    MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} "    return {};\n")
+file(APPEND ${TIMEZONES_FILE} "}\n")
 
 add_library (tzdata ${TIMEZONES_FILE})
 target_link_libraries(tzdata ch_contrib::incbin)
diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp
index 3619462e79b..d5e04238ef9 100644
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@@ -10,11 +10,12 @@
 #include <chrono>
 #include <cstring>
 #include <memory>
+#include <iostream>
 
 
 /// Embedded timezones.
-struct TimeZone { const char * name; const unsigned char * data; size_t size; };
-extern TimeZone auto_time_zones[];
+std::string_view getTimeZone(const char * name);
+
 
 namespace
 {
@@ -252,15 +253,10 @@ namespace cctz_extension
             const std::string & name,
             const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
         {
-            const TimeZone * timezone = auto_time_zones;
-            while (timezone->name != nullptr)
-            {
-                if (timezone->name == name)
-                    break;
-                ++timezone;
-            }
-            if (timezone->size)
-                return std::make_unique<Source>(reinterpret_cast<const char *>(timezone->data), timezone->size);
+            std::string_view tz_file = getTimeZone(name.data());
+
+            if (!tz_file.empty())
+                return std::make_unique<Source>(tz_file.data(), tz_file.size());
 
             return fallback(name);
         }
diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index b09319c78d6..3d3a3f04941 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -15,8 +15,7 @@
 #endif
 
 // All timezones present at build time and embedded into ClickHouse binary.
-struct TimeZone { const char * name; const unsigned char * data; size_t size; };
-extern TimeZone auto_time_zones[];
+extern const char * auto_time_zones[];
 
 namespace
 {
@@ -33,14 +32,14 @@ std::vector<const char*> allTimezones(bool with_weird_offsets = true)
 {
     std::vector<const char*> result;
 
-    const TimeZone * timezone = auto_time_zones;
-    while (timezone->name)
+    const auto * timezone_name = auto_time_zones;
+    while (*timezone_name)
     {
-        bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia");
+        bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia");
 
         if (!weird_offsets || with_weird_offsets)
-            result.push_back(timezone->name);
-        ++timezone;
+            result.push_back(*timezone_name);
+        ++timezone_name;
     }
 
     return result;
diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt
index 6b7d1739e33..c3a2e726365 100644
--- a/src/Storages/System/CMakeLists.txt
+++ b/src/Storages/System/CMakeLists.txt
@@ -30,7 +30,6 @@ endif()
 add_dependencies(generate-source generate-contributors)
 
 set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
-set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
 
 add_custom_command(
     OUTPUT StorageSystemLicenses.generated.cpp
@@ -38,7 +37,6 @@ add_custom_command(
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 
 list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
-list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
 
 # Overlength strings
 set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp
index 41227ab7780..dc3711812a6 100644
--- a/src/Storages/System/StorageSystemTimeZones.cpp
+++ b/src/Storages/System/StorageSystemTimeZones.cpp
@@ -4,8 +4,7 @@
 #include <DataTypes/DataTypeString.h>
 
 
-struct TimeZone { const char * name; const unsigned char * data; size_t size; };
-extern TimeZone auto_time_zones[];
+extern const char * auto_time_zones[];
 
 namespace DB
 {
@@ -18,7 +17,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes()
 
 void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
 {
-    for (auto * it = auto_time_zones; it->name != nullptr; ++it)
-        res_columns[0]->insert(String(it->name));
+    for (auto * it = auto_time_zones; *it; ++it)
+        res_columns[0]->insert(String(*it));
 }
 }

From 8013cb1f784f6324b3c7b227499751dc7e666009 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 23 Jul 2023 08:46:44 +0200
Subject: [PATCH 194/242] Remove skip_startup_tables from
 IDatabase::loadStoredObjects()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Databases/DatabaseAtomic.cpp                | 5 ++---
 src/Databases/DatabaseAtomic.h                  | 2 +-
 src/Databases/DatabaseLazy.cpp                  | 3 +--
 src/Databases/DatabaseLazy.h                    | 2 +-
 src/Databases/DatabaseOrdinary.cpp              | 9 +--------
 src/Databases/DatabaseOrdinary.h                | 2 +-
 src/Databases/DatabaseReplicated.cpp            | 5 ++---
 src/Databases/DatabaseReplicated.h              | 2 +-
 src/Databases/IDatabase.h                       | 3 +--
 src/Databases/MySQL/DatabaseMySQL.cpp           | 2 +-
 src/Databases/MySQL/DatabaseMySQL.h             | 2 +-
 src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 2 +-
 src/Databases/PostgreSQL/DatabasePostgreSQL.h   | 2 +-
 src/Databases/TablesLoader.cpp                  | 2 +-
 14 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 7e20b6f6535..0f65069db35 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -441,11 +441,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin
     }
 }
 
-void DatabaseAtomic::loadStoredObjects(
-    ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
+void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
 {
     beforeLoadingMetadata(local_context, mode);
-    DatabaseOrdinary::loadStoredObjects(local_context, mode, skip_startup_tables);
+    DatabaseOrdinary::loadStoredObjects(local_context, mode);
 }
 
 void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode)
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index cb275812098..70553b2d5c2 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -48,7 +48,7 @@ public:
 
     DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
 
-    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
 
     void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
 
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index f27c6c0c3ee..896ae99656f 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -37,8 +37,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_,
 }
 
 
-void DatabaseLazy::loadStoredObjects(
-    ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
+void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/)
 {
     iterateMetadataFiles(local_context, [this, &local_context](const String & file_name)
     {
diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h
index b01038073ef..2b1b119754d 100644
--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@@ -26,7 +26,7 @@ public:
 
     bool canContainDistributedTables() const override { return false; }
 
-    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/) override;
 
     void createTable(
         ContextPtr context,
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 8c92b8064ca..51d37b84e14 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -89,8 +89,7 @@ DatabaseOrdinary::DatabaseOrdinary(
 {
 }
 
-void DatabaseOrdinary::loadStoredObjects(
-    ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
+void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
 {
     /** Tables load faster if they are loaded in sorted (by name) order.
       * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order,
@@ -159,12 +158,6 @@ void DatabaseOrdinary::loadStoredObjects(
     }
 
     pool.wait();
-
-    if (!skip_startup_tables)
-    {
-        /// After all tables was basically initialized, startup them.
-        startupTables(pool, mode);
-    }
 }
 
 void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup)
diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h
index f9aa3214ef5..cabc8f9c55b 100644
--- a/src/Databases/DatabaseOrdinary.h
+++ b/src/Databases/DatabaseOrdinary.h
@@ -21,7 +21,7 @@ public:
 
     String getEngineName() const override { return "Ordinary"; }
 
-    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
 
     bool supportsLoadingInTopologicalOrder() const override { return true; }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 25c23e2be17..d3b3d4b545f 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -495,11 +495,10 @@ void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, Lo
     tryConnectToZooKeeperAndInitDatabase(mode);
 }
 
-void DatabaseReplicated::loadStoredObjects(
-    ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
+void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
 {
     beforeLoadingMetadata(local_context, mode);
-    DatabaseAtomic::loadStoredObjects(local_context, mode, skip_startup_tables);
+    DatabaseAtomic::loadStoredObjects(local_context, mode);
 }
 
 UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index ff1a4aba41c..8e33f482ac1 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -67,7 +67,7 @@ public:
 
     void drop(ContextPtr /*context*/) override;
 
-    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
 
     void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
 
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index a9577dfc84a..9bed3c4bfc5 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -134,8 +134,7 @@ public:
     /// You can call only once, right after the object is created.
     virtual void loadStoredObjects( /// NOLINT
         ContextMutablePtr /*context*/,
-        LoadingStrictnessLevel /*mode*/,
-        bool /* skip_startup_tables */)
+        LoadingStrictnessLevel /*mode*/)
     {
     }
 
diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp
index 70bd32efed9..94e5ba1773e 100644
--- a/src/Databases/MySQL/DatabaseMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseMySQL.cpp
@@ -402,7 +402,7 @@ String DatabaseMySQL::getMetadataPath() const
     return metadata_path;
 }
 
-void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
+void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/)
 {
 
     std::lock_guard lock{mutex};
diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h
index f34a2fff4f7..e5b1f434d2f 100644
--- a/src/Databases/MySQL/DatabaseMySQL.h
+++ b/src/Databases/MySQL/DatabaseMySQL.h
@@ -76,7 +76,7 @@ public:
 
     void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override;
 
-    void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
 
     StoragePtr detachTable(ContextPtr context, const String & table_name) override;
 
diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
index f4d750f85d4..812a0d8717e 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@@ -296,7 +296,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/)
 }
 
 
-void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
+void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/)
 {
     {
         std::lock_guard lock{mutex};
diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
index 31fa036c0ee..d731e06649b 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
@@ -44,7 +44,7 @@ public:
 
     bool empty() const override;
 
-    void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
+    void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
 
     DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
 
diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp
index ea0f2072430..f8b4e7fe33b 100644
--- a/src/Databases/TablesLoader.cpp
+++ b/src/Databases/TablesLoader.cpp
@@ -49,7 +49,7 @@ void TablesLoader::loadTables()
         if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder())
             databases_to_load.push_back(database.first);
         else
-            database.second->loadStoredObjects(global_context, strictness_mode, /* skip_startup_tables */ true);
+            database.second->loadStoredObjects(global_context, strictness_mode);
     }
 
     if (databases_to_load.empty())

From 282258a855cfed40e0b2cd7c0ada3ec1defe8e06 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sun, 23 Jul 2023 11:29:29 +0200
Subject: [PATCH 195/242] fix style

---
 src/Common/OptimizedRegularExpression.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index e636b0b987d..05e6aefbb5e 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -1,5 +1,6 @@
 #include <limits>
 #include <Common/Exception.h>
+#include <Common/logger_useful.h>
 #include <Common/PODArray.h>
 #include <Common/checkStackSize.h>
 #include <Common/OptimizedRegularExpression.h>
@@ -439,7 +440,7 @@ catch (...)
     is_trivial = false;
     required_substring_is_prefix = false;
     alternatives.clear();
-    std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n";
+    LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false));
 }
 
 template <bool thread_safe>

From 4c1f8f38cd4073b24064e076a677082db546c680 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 17:30:23 +0200
Subject: [PATCH 196/242] Fix CI

---
 docker/test/fasttest/run.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index e25b5fdbfed..60e6199aaa4 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -147,6 +147,7 @@ function clone_submodules
             contrib/simdjson
             contrib/liburing
             contrib/libfiu
+            contrib/incbin
         )
 
         git submodule sync

From 8902bbdb60b466498ab2825000502195d5d35c91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 17:59:49 +0200
Subject: [PATCH 197/242] Fix fasttest

---
 src/Common/FrequencyHolder.cpp                    | 4 ++++
 src/Common/FrequencyHolder.h                      | 6 ++++++
 src/Functions/FunctionsCharsetClassification.cpp  | 2 +-
 src/Functions/FunctionsLanguageClassification.cpp | 4 +---
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp
index 3b755cacacb..fe03e6a1b44 100644
--- a/src/Common/FrequencyHolder.cpp
+++ b/src/Common/FrequencyHolder.cpp
@@ -1,5 +1,7 @@
 #include <Common/FrequencyHolder.h>
 
+#if USE_NLP
+
 #include <incbin.h>
 
 /// Embedded SQL definitions
@@ -179,3 +181,5 @@ void FrequencyHolder::loadProgrammingFrequency()
 }
 
 }
+
+#endif
diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h
index 270e4dbbd2a..73675ed9814 100644
--- a/src/Common/FrequencyHolder.h
+++ b/src/Common/FrequencyHolder.h
@@ -1,5 +1,9 @@
 #pragma once
 
+#include "config.h"
+
+#if USE_NLP
+
 #include <base/StringRef.h>
 #include <Common/logger_useful.h>
 
@@ -81,3 +85,5 @@ private:
     EncodingContainer encodings_freq;
 };
 }
+
+#endif
diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp
index a25da8f6c13..237d4c37fa2 100644
--- a/src/Functions/FunctionsCharsetClassification.cpp
+++ b/src/Functions/FunctionsCharsetClassification.cpp
@@ -3,7 +3,7 @@
 #include <Functions/FunctionsTextClassification.h>
 
 #include <memory>
-#include <unordered_map>
+
 
 namespace DB
 {
diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp
index 6088fd52efa..55485d41ce0 100644
--- a/src/Functions/FunctionsLanguageClassification.cpp
+++ b/src/Functions/FunctionsLanguageClassification.cpp
@@ -5,19 +5,17 @@
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
 #include <Common/isValidUTF8.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsTextClassification.h>
-#include <Interpreters/Context.h>
 
 #include <compact_lang_det.h>
 
+
 namespace DB
 {
 /* Determine language of Unicode UTF-8 text.

From 43bd6d1b8336f282cc4548c0f61b52516f49ac13 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 19:00:49 +0300
Subject: [PATCH 198/242] Revert "Add an ability to specify allocations size
 for sampling memory profiler"

---
 programs/server/Server.cpp                    | 21 +++--------
 src/Common/MemoryTracker.cpp                  | 10 +----
 src/Common/MemoryTracker.h                    | 18 ---------
 src/Core/ServerSettings.h                     |  8 +---
 src/Core/Settings.h                           |  4 +-
 src/Interpreters/ProcessList.cpp              |  3 --
 src/Interpreters/ThreadStatusExt.cpp          |  2 -
 .../__init__.py                               |  1 -
 .../configs/max_untracked_memory.xml          |  7 ----
 .../configs/memory_profiler.xml               |  5 ---
 .../test.py                                   | 37 -------------------
 ...r_sample_min_max_allocation_size.reference |  1 -
 ...profiler_sample_min_max_allocation_size.sh | 18 ---------
 13 files changed, 11 insertions(+), 124 deletions(-)
 delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py
 delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
 delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
 delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py
 delete mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
 delete mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 33fdcc9c1a8..9202d4b32c1 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1637,26 +1637,17 @@ try
         global_context->initializeTraceCollector();
 
         /// Set up server-wide memory profiler (for total memory tracker).
-        if (server_settings.total_memory_profiler_step)
+        UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
+        if (total_memory_profiler_step)
         {
-            total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
+            total_memory_tracker.setProfilerStep(total_memory_profiler_step);
         }
 
-        if (server_settings.total_memory_tracker_sample_probability > 0.0)
+        double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
+        if (total_memory_tracker_sample_probability > 0.0)
         {
-            total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
+            total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
         }
-
-        if (server_settings.total_memory_profiler_sample_min_allocation_size)
-        {
-            total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
-        }
-
-        if (server_settings.total_memory_profiler_sample_max_allocation_size)
-        {
-            total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
-        }
-
     }
 #endif
 
diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 52cae0768dc..81cac2617c5 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
     }
 
     std::bernoulli_distribution sample(sample_probability);
-    if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
+    if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
     {
         MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
         DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size});
@@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size)
     }
 
     std::bernoulli_distribution sample(sample_probability);
-    if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng)))
+    if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
     {
         MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
         DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size});
@@ -534,12 +534,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
         ;
 }
 
-bool MemoryTracker::isSizeOkForSampling(UInt64 size) const
-{
-    /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation
-    return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes);
-}
-
 bool canEnqueueBackgroundTask()
 {
     auto limit = background_memory_tracker.getSoftLimit();
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 768dc8a7404..4e29d40c953 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -67,12 +67,6 @@ private:
     /// To randomly sample allocations and deallocations in trace_log.
     double sample_probability = 0;
 
-    /// Randomly sample allocations only larger or equal to this size
-    UInt64 min_allocation_size_bytes = 0;
-
-    /// Randomly sample allocations only smaller or equal to this size
-    UInt64 max_allocation_size_bytes = 0;
-
     /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
     /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
     std::atomic<MemoryTracker *> parent {};
@@ -94,8 +88,6 @@ private:
 
     void setOrRaiseProfilerLimit(Int64 value);
 
-    bool isSizeOkForSampling(UInt64 size) const;
-
     /// allocImpl(...) and free(...) should not be used directly
     friend struct CurrentMemoryTracker;
     void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
@@ -173,16 +165,6 @@ public:
         sample_probability = value;
     }
 
-    void setSampleMinAllocationSize(UInt64 value)
-    {
-        min_allocation_size_bytes = value;
-    }
-
-    void setSampleMaxAllocationSize(UInt64 value)
-    {
-        max_allocation_size_bytes = value;
-    }
-
     void setProfilerStep(Int64 value)
     {
         profiler_step = value;
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index f7a6c9e950e..1a9f226041b 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -81,12 +81,8 @@ namespace DB
     M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
     M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
     M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
-    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
-    \
-    M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
-    M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
-    M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
-    M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0)
+    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0)
+
 
 DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4fc93500910..24be644ee55 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -427,9 +427,7 @@ class IColumn;
     M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \
     M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
     M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
-    M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
-    M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
-    M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
+    M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
     M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \
     \
     M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index c299572ef41..1503e396298 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -223,10 +223,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
             {
                 /// Set up memory profiling
                 thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
-
                 thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
-                thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
-                thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
                 thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events);
             }
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index bac16c05533..398bea26b87 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -83,8 +83,6 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex
     const Settings & settings = storage_context->getSettingsRef();
     group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
     group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
-    group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size);
-    group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size);
     group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
     group->memory_tracker.setParent(&background_memory_tracker);
     if (settings.memory_tracker_fault_probability > 0.0)
diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py
deleted file mode 100644
index e5a0d9b4834..00000000000
--- a/tests/integration/test_memory_profiler_min_max_borders/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-#!/usr/bin/env python3
diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
deleted file mode 100644
index 56fc5ed34ca..00000000000
--- a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default>
-            <max_untracked_memory>1</max_untracked_memory>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
deleted file mode 100644
index 5b3e17d145f..00000000000
--- a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<clickhouse>
-    <total_memory_tracker_sample_probability>1</total_memory_tracker_sample_probability>
-    <total_memory_profiler_sample_min_allocation_size>4096</total_memory_profiler_sample_min_allocation_size>
-    <total_memory_profiler_sample_max_allocation_size>8192</total_memory_profiler_sample_max_allocation_size>
-</clickhouse>
diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py
deleted file mode 100644
index 6ab971fa9c4..00000000000
--- a/tests/integration/test_memory_profiler_min_max_borders/test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from helpers.cluster import ClickHouseCluster
-import pytest
-
-cluster = ClickHouseCluster(__file__)
-node = cluster.add_instance(
-    "node",
-    main_configs=["configs/memory_profiler.xml"],
-    user_configs=["configs/max_untracked_memory.xml"],
-)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def test_trace_boundaries_work(started_cluster):
-    node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null")
-    node.query("SYSTEM FLUSH LOGS")
-
-    assert (
-        node.query(
-            "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'"
-        )
-        == "1\n"
-    )
-    assert (
-        node.query(
-            "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)"
-        )
-        == "0\n"
-    )
diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
deleted file mode 100644
index d00491fd7e5..00000000000
--- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh
deleted file mode 100755
index b1fbea26da7..00000000000
--- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings
-# requires TraceCollector, does not available under sanitizers and aarch64
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM"
-${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null"
-
-${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS"
-
-# at least something allocated
-${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'"
-
-# show wrong allocations
-${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)"

From e56e1ebd5d8fbb808867c1f98e421383acf38b1f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 18:29:07 +0200
Subject: [PATCH 199/242] Fix fasttest

---
 src/Functions/FunctionsCharsetClassification.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp
index 237d4c37fa2..7704e3eafc0 100644
--- a/src/Functions/FunctionsCharsetClassification.cpp
+++ b/src/Functions/FunctionsCharsetClassification.cpp
@@ -1,4 +1,9 @@
 #include <Common/FrequencyHolder.h>
+
+#include "config.h"
+
+#if USE_NLP
+
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsTextClassification.h>
 
@@ -150,3 +155,5 @@ REGISTER_FUNCTION(DetectCharset)
 }
 
 }
+
+#endif

From 039cac69cf6d30cc58c8531b1efac4d9847cb599 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sun, 23 Jul 2023 18:35:37 +0200
Subject: [PATCH 200/242] Fix test_insert_same_partition_and_merge by
 increasing wait time

---
 tests/integration/test_merge_tree_azure_blob_storage/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py
index 761b5257a34..86b70f8db70 100644
--- a/tests/integration/test_merge_tree_azure_blob_storage/test.py
+++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py
@@ -215,7 +215,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
         if attempt == 59:
             assert parts_count == "(1)"
 
-        time.sleep(1)
+        time.sleep(10)
 
     assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     assert (

From 311b3adf89b9d54c4b3bf40feb4179d967ed3d2e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 20:10:41 +0200
Subject: [PATCH 201/242] Fix fasttest

---
 src/Functions/FunctionsCharsetClassification.cpp     | 2 --
 src/Functions/FunctionsProgrammingClassification.cpp | 5 +++++
 src/Functions/FunctionsTonalityClassification.cpp    | 5 +++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp
index 7704e3eafc0..05b173e3d95 100644
--- a/src/Functions/FunctionsCharsetClassification.cpp
+++ b/src/Functions/FunctionsCharsetClassification.cpp
@@ -1,7 +1,5 @@
 #include <Common/FrequencyHolder.h>
 
-#include "config.h"
-
 #if USE_NLP
 
 #include <Functions/FunctionFactory.h>
diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp
index 8a552a30e65..a93e1d9a87d 100644
--- a/src/Functions/FunctionsProgrammingClassification.cpp
+++ b/src/Functions/FunctionsProgrammingClassification.cpp
@@ -1,4 +1,7 @@
 #include <Common/FrequencyHolder.h>
+
+#if USE_NLP
+
 #include <Common/StringUtils/StringUtils.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsTextClassification.h>
@@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
 }
 
 }
+
+#endif
diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp
index e39f9c63758..3de38d99c88 100644
--- a/src/Functions/FunctionsTonalityClassification.cpp
+++ b/src/Functions/FunctionsTonalityClassification.cpp
@@ -1,4 +1,7 @@
 #include <Common/FrequencyHolder.h>
+
+#if USE_NLP
+
 #include <Common/StringUtils/StringUtils.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsTextClassification.h>
@@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
 }
 
 }
+
+#endif

From 49f4ef6ffb9264d8b4a31c8e4ab683f01afd4268 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 20:11:24 +0200
Subject: [PATCH 202/242] Fix typo

---
 src/Functions/FunctionsCharsetClassification.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp
index 05b173e3d95..0a332ab70a9 100644
--- a/src/Functions/FunctionsCharsetClassification.cpp
+++ b/src/Functions/FunctionsCharsetClassification.cpp
@@ -49,7 +49,7 @@ namespace
         return res;
     }
 
-    /// Сount how many times each bigram occurs in the text.
+    /// Count how many times each bigram occurs in the text.
     template <typename ModelMap>
     ALWAYS_INLINE inline void calculateStats(
         const UInt8 * data,

From e21a4c4c9a3f50436b8e708b6a38cdf8eee3c6be Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 20:57:16 +0200
Subject: [PATCH 203/242] Fix the test

---
 .../02415_all_new_functions_must_be_documented.reference      | 4 ----
 .../02415_all_new_functions_must_be_documented.sql            | 4 +++-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 595ebb483d5..b7097ad329b 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -238,10 +238,6 @@ defaultValueOfArgumentType
 defaultValueOfTypeName
 degrees
 demangle
-detectCharset
-detectLanguageUnknown
-detectProgrammingLanguage
-detectTonality
 divide
 dotProduct
 dumpColumnStructure
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql
index ed95c06d016..4f40da6c626 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql
@@ -15,5 +15,7 @@ AND name NOT IN (
     'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
     'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
     'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
-    'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
+    'lemmatize', 'tokenize', 'stem', 'synonyms',
+    'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
+     -- these functions are not enabled in fast test
 ) ORDER BY name;

From 67f643f27e5930765d0b6881c415ffacf369c14f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jul 2023 21:00:28 +0200
Subject: [PATCH 204/242] Fix error

---
 contrib/cctz-cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 7edeada6e59..fde31dd469d 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -55,7 +55,7 @@ foreach(TIMEZONE ${TIMEZONES})
     MATH(EXPR COUNTER "${COUNTER}+1")
 endforeach(TIMEZONE)
 
-file(APPEND ${TIMEZONES_FILE} "};\n\n")
+file(APPEND ${TIMEZONES_FILE} "    nullptr\n};\n\n")
 
 file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
 file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )

From e02948580b31c61e32860da04f966a21231e14c7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 23 Jul 2023 22:38:59 +0200
Subject: [PATCH 205/242] Don't shutdown interserver before tables

---
 programs/server/Server.cpp | 91 +++++++++++++++++++++++++++++---------
 programs/server/Server.h   | 11 ++++-
 2 files changed, 79 insertions(+), 23 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index c7a7ba71e83..8c6e41d28c6 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -739,11 +739,12 @@ try
         [&]() -> std::vector<ProtocolServerMetrics>
         {
             std::vector<ProtocolServerMetrics> metrics;
-            metrics.reserve(servers_to_start_before_tables.size());
+
+            std::lock_guard lock(servers_lock);
+            metrics.reserve(servers_to_start_before_tables.size() + servers.size());
             for (const auto & server : servers_to_start_before_tables)
                 metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
 
-            std::lock_guard lock(servers_lock);
             for (const auto & server : servers)
                 metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
             return metrics;
@@ -1302,7 +1303,7 @@ try
                 global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
 
                 std::lock_guard lock(servers_lock);
-                updateServers(*config, server_pool, async_metrics, servers);
+                updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
             }
 
             global_context->updateStorageConfiguration(*config);
@@ -1404,10 +1405,27 @@ try
 
     }
 
-    for (auto & server : servers_to_start_before_tables)
     {
-        server.start();
-        LOG_INFO(log, "Listening for {}", server.getDescription());
+        std::lock_guard lock(servers_lock);
+        /// We should start interserver communications before (and more imporant shutdown after) tables.
+        /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
+        /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
+        /// communicate with zookeeper, execute merges, etc.
+        createInterserverServers(
+            config(),
+            interserver_listen_hosts,
+            listen_try,
+            server_pool,
+            async_metrics,
+            servers_to_start_before_tables,
+            /* start_servers= */ false);
+
+
+        for (auto & server : servers_to_start_before_tables)
+        {
+            server.start();
+            LOG_INFO(log, "Listening for {}", server.getDescription());
+        }
     }
 
     /// Initialize access storages.
@@ -1527,10 +1545,13 @@ try
         {
             LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
             size_t current_connections = 0;
-            for (auto & server : servers_to_start_before_tables)
             {
-                server.stop();
-                current_connections += server.currentConnections();
+                std::lock_guard lock(servers_lock);
+                for (auto & server : servers_to_start_before_tables)
+                {
+                    server.stop();
+                    current_connections += server.currentConnections();
+                }
             }
 
             if (current_connections)
@@ -1709,7 +1730,7 @@ try
 
         {
             std::lock_guard lock(servers_lock);
-            createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers);
+            createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
             if (servers.empty())
                 throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
                                 "No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
@@ -1967,7 +1988,6 @@ HTTPContextPtr Server::httpContext() const
 void Server::createServers(
     Poco::Util::AbstractConfiguration & config,
     const Strings & listen_hosts,
-    const Strings & interserver_listen_hosts,
     bool listen_try,
     Poco::ThreadPool & server_pool,
     AsynchronousMetrics & async_metrics,
@@ -2189,6 +2209,23 @@ void Server::createServers(
                     httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
         });
     }
+}
+
+void Server::createInterserverServers(
+    Poco::Util::AbstractConfiguration & config,
+    const Strings & interserver_listen_hosts,
+    bool listen_try,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    std::vector<ProtocolServerAdapter> & servers,
+    bool start_servers)
+{
+    const Settings & settings = global_context->getSettingsRef();
+
+    Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
+    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+    http_params->setTimeout(settings.http_receive_timeout);
+    http_params->setKeepAliveTimeout(keep_alive_timeout);
 
     /// Now iterate over interserver_listen_hosts
     for (const auto & interserver_listen_host : interserver_listen_hosts)
@@ -2237,14 +2274,14 @@ void Server::createServers(
 #endif
         });
     }
-
 }
 
 void Server::updateServers(
     Poco::Util::AbstractConfiguration & config,
     Poco::ThreadPool & server_pool,
     AsynchronousMetrics & async_metrics,
-    std::vector<ProtocolServerAdapter> & servers)
+    std::vector<ProtocolServerAdapter> & servers,
+    std::vector<ProtocolServerAdapter> & servers_to_start_before_tables)
 {
     Poco::Logger * log = &logger();
 
@@ -2270,11 +2307,19 @@ void Server::updateServers(
 
     Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config();
 
+    std::vector<ProtocolServerAdapter *> all_servers;
+    all_servers.reserve(servers.size() + servers_to_start_before_tables.size());
     for (auto & server : servers)
+        all_servers.push_back(&server);
+
+    for (auto & server : servers_to_start_before_tables)
+        all_servers.push_back(&server);
+
+    for (auto * server : all_servers)
     {
-        if (!server.isStopping())
+        if (!server->isStopping())
         {
-            std::string port_name = server.getPortName();
+            std::string port_name = server->getPortName();
             bool has_host = false;
             bool is_http = false;
             if (port_name.starts_with("protocols."))
@@ -2312,27 +2357,29 @@ void Server::updateServers(
                 /// NOTE: better to compare using getPortName() over using
                 /// dynamic_cast<> since HTTPServer is also used for prometheus and
                 /// internal replication communications.
-                is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port";
+                is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port";
             }
 
             if (!has_host)
-                has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end();
+                has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end();
             bool has_port = !config.getString(port_name, "").empty();
             bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
             if (force_restart)
-                LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server.getDescription());
+                LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server->getDescription());
 
-            if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart)
+            if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart)
             {
-                server.stop();
-                LOG_INFO(log, "Stopped listening for {}", server.getDescription());
+                server->stop();
+                LOG_INFO(log, "Stopped listening for {}", server->getDescription());
             }
         }
     }
 
-    createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
+    createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
+    createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true);
 
     std::erase_if(servers, std::bind_front(check_server, ""));
+    std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, ""));
 }
 
 }
diff --git a/programs/server/Server.h b/programs/server/Server.h
index e9ae6d8d937..d13378dcd65 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -102,6 +102,14 @@ private:
     void createServers(
         Poco::Util::AbstractConfiguration & config,
         const Strings & listen_hosts,
+        bool listen_try,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        std::vector<ProtocolServerAdapter> & servers,
+        bool start_servers = false);
+
+    void createInterserverServers(
+        Poco::Util::AbstractConfiguration & config,
         const Strings & interserver_listen_hosts,
         bool listen_try,
         Poco::ThreadPool & server_pool,
@@ -113,7 +121,8 @@ private:
         Poco::Util::AbstractConfiguration & config,
         Poco::ThreadPool & server_pool,
         AsynchronousMetrics & async_metrics,
-        std::vector<ProtocolServerAdapter> & servers);
+        std::vector<ProtocolServerAdapter> & servers,
+        std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
 };
 
 }

From c0f16dcf031b62e2eebdef249c132e9351203bc0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 23 Jul 2023 21:10:12 +0000
Subject: [PATCH 206/242] Test from fuzzer

---
 .../02831_ast_fuzz_asan_join.reference        |  0
 .../0_stateless/02831_ast_fuzz_asan_join.sql  | 22 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference
 create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql

diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql
new file mode 100644
index 00000000000..7c7bfd2df88
--- /dev/null
+++ b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql
@@ -0,0 +1,22 @@
+SELECT
+    '0',
+    toTypeName(materialize(js2.s))
+FROM
+(
+    SELECT number AS k
+    FROM numbers(100)
+) AS js1
+FULL OUTER JOIN
+(
+    SELECT
+        toLowCardinality(2147483647 + 256) AS k,
+        '-0.0000000001',
+        1024,
+        toString(number + 10) AS s
+    FROM numbers(1024)
+) AS js2 ON js1.k = js2.k
+ORDER BY
+    inf DESC NULLS FIRST,
+    js1.k ASC NULLS LAST,
+    js2.k ASC
+FORMAT `Null`

From 1e467867e68c2c382f26291753bab45e2bc87a60 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 00:03:40 +0200
Subject: [PATCH 207/242] Attempt to fix LTO

---
 contrib/cctz-cmake/CMakeLists.txt                     |  3 +--
 programs/install/Install.cpp                          |  6 ++++--
 programs/server/Server.cpp                            |  2 +-
 src/Common/FrequencyHolder.cpp                        |  6 +++---
 src/Common/config.h.in                                |  4 ++++
 src/Server/WebUIRequestHandler.cpp                    |  8 +++++---
 src/Storages/System/attachInformationSchemaTables.cpp | 10 ++++++----
 src/configure_config.cmake                            |  2 ++
 8 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index fde31dd469d..7161f743de1 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -44,7 +44,7 @@ file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
 
 set (COUNTER 1)
 foreach(TIMEZONE ${TIMEZONES})
-    file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n")
+    file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
     MATH(EXPR COUNTER "${COUNTER}+1")
 endforeach(TIMEZONE)
 
@@ -71,7 +71,6 @@ file(APPEND ${TIMEZONES_FILE} "}\n")
 
 add_library (tzdata ${TIMEZONES_FILE})
 target_link_libraries(tzdata ch_contrib::incbin)
-target_include_directories(tzdata PRIVATE ${TZDIR})
 target_link_libraries(_cctz tzdata)
 
 add_library(ch_contrib::cctz ALIAS _cctz)
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index da2c95af62c..d7086c95beb 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -34,9 +34,11 @@
 
 #include <incbin.h>
 
+#include "config.h"
+
 /// Embedded configuration files used inside the install program
-INCBIN(resource_config_xml, "config.xml");
-INCBIN(resource_users_xml, "users.xml");
+INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
+INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
 
 
 /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 229a169dc1e..2ab89ad048a 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -130,7 +130,7 @@
 
 #include <incbin.h>
 /// A minimal file used when the server is run without installation
-INCBIN(resource_embedded_xml, "embedded.xml");
+INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml");
 
 namespace CurrentMetrics
 {
diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp
index fe03e6a1b44..7dc1f622aeb 100644
--- a/src/Common/FrequencyHolder.cpp
+++ b/src/Common/FrequencyHolder.cpp
@@ -5,9 +5,9 @@
 #include <incbin.h>
 
 /// Embedded SQL definitions
-INCBIN(resource_charset_zst, "charset.zst");
-INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
-INCBIN(resource_programming_zst, "programming.zst");
+INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst");
+INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst");
+INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst");
 
 
 namespace DB
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index a2c18fc330f..628f0847d65 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -59,3 +59,7 @@
 #cmakedefine01 USE_ULID
 #cmakedefine01 FIU_ENABLE
 #cmakedefine01 USE_BCRYPT
+
+/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
+/// That's why we use absolute paths.
+#cmakedefine SOURCE_DIR "@SOURCE_DIR@"
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index cb9e8935d8c..6fa1d65de42 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -11,10 +11,12 @@
 
 #include <incbin.h>
 
+#include "config.h"
+
 /// Embedded HTML pages
-INCBIN(resource_play_html, "play.html");
-INCBIN(resource_dashboard_html, "dashboard.html");
-INCBIN(resource_uplot_js, "js/uplot.js");
+INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html");
+INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html");
+INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js");
 
 
 namespace DB
diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp
index bfc5c8c64e2..12cef89b553 100644
--- a/src/Storages/System/attachInformationSchemaTables.cpp
+++ b/src/Storages/System/attachInformationSchemaTables.cpp
@@ -5,11 +5,13 @@
 #include <Parsers/parseQuery.h>
 #include <incbin.h>
 
+#include "config.h"
+
 /// Embedded SQL definitions
-INCBIN(resource_schemata_sql, "schemata.sql");
-INCBIN(resource_tables_sql, "tables.sql");
-INCBIN(resource_views_sql, "views.sql");
-INCBIN(resource_columns_sql, "columns.sql");
+INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql");
+INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
+INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
+INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
 
 
 namespace DB
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index ae6305705c2..5529e2f2f39 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -162,3 +162,5 @@ endif ()
 if (TARGET ch_contrib::fiu)
     set(FIU_ENABLE 1)
 endif()
+
+set(SOURCE_DIR ${CMAKE_SOURCE_DIR})

From 7b4d0cf9d5b261eb68bd1db4021fcc350b907fc1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 00:51:20 +0200
Subject: [PATCH 208/242] Fix Darwin

---
 contrib/incbin-cmake/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt
index e64ebc99c73..8f4dad7e0d9 100644
--- a/contrib/incbin-cmake/CMakeLists.txt
+++ b/contrib/incbin-cmake/CMakeLists.txt
@@ -2,3 +2,7 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
 add_library(_incbin INTERFACE)
 target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
 add_library(ch_contrib::incbin ALIAS _incbin)
+
+# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
+# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
+target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING)

From 641c086dbd771c14cc7db089e265ec508da9ccff Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 00:53:11 +0200
Subject: [PATCH 209/242] Fix Darwin

---
 contrib/incbin-cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt
index 8f4dad7e0d9..5778cf83c22 100644
--- a/contrib/incbin-cmake/CMakeLists.txt
+++ b/contrib/incbin-cmake/CMakeLists.txt
@@ -5,4 +5,4 @@ add_library(ch_contrib::incbin ALIAS _incbin)
 
 # Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
 # Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
-target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING)
+target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)

From 40f5649811bb579b3cf8d634281f862675934773 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 01:02:11 +0200
Subject: [PATCH 210/242] Fix test

---
 .../no_allow_vertical_merges_from_compact_to_wide_parts.xml  | 5 +++++
 .../test_vertical_merges_from_compact_parts.py               | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml

diff --git a/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml
new file mode 100644
index 00000000000..c69be846c46
--- /dev/null
+++ b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <merge_tree>
+        <allow_vertical_merges_from_compact_to_wide_parts>0</allow_vertical_merges_from_compact_to_wide_parts>
+    </merge_tree>
+</clickhouse>
diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
index 82ffcc20b60..481621cacfe 100644
--- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
+++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
@@ -15,7 +15,7 @@ node_old = cluster.add_instance(
 )
 node_new = cluster.add_instance(
     "node2",
-    main_configs=["configs/no_compress_marks.xml"],
+    main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"],
     with_zookeeper=True,
     stay_alive=True,
     allow_analyzer=False,

From dba7a0dffc4927a88c04cb7b9ec93faeeba40b3c Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 23 Jul 2023 23:18:19 +0000
Subject: [PATCH 211/242] Automatic style fix

---
 .../test_vertical_merges_from_compact_parts.py               | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
index 481621cacfe..9c9d1a4d312 100644
--- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
+++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py
@@ -15,7 +15,10 @@ node_old = cluster.add_instance(
 )
 node_new = cluster.add_instance(
     "node2",
-    main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"],
+    main_configs=[
+        "configs/no_compress_marks.xml",
+        "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml",
+    ],
     with_zookeeper=True,
     stay_alive=True,
     allow_analyzer=False,

From d7cdfb47d3795a3a09c2a204789c95e9726dc2b6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 01:56:04 +0200
Subject: [PATCH 212/242] Fix merge

---
 src/IO/WriteHelpers.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 0494cdf22e7..76778543bd0 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -953,6 +953,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
     {
         auto remainder = value % 10;
         value /= 10;
+
+        if (remainder != 0 && last_nonzero_pos == 0)
+            last_nonzero_pos = pos;
+
+        buf[pos] += static_cast<char>(remainder);
     }
 
     writeChar('.', ostr);

From 75efee9675f277fc3405ca5b256296aa406baca4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 05:34:00 +0200
Subject: [PATCH 213/242] Fix errors

---
 programs/install/CMakeLists.txt | 3 ---
 programs/server/CMakeLists.txt  | 2 +-
 src/CMakeLists.txt              | 3 +--
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt
index f3f562bab7c..c3f4d96d631 100644
--- a/programs/install/CMakeLists.txt
+++ b/programs/install/CMakeLists.txt
@@ -10,6 +10,3 @@ set (CLICKHOUSE_INSTALL_LINK
 )
 
 clickhouse_program_add_library(install)
-
-# For incbin
-target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")
diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt
index e008e65acf6..b8241afa1eb 100644
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@@ -29,4 +29,4 @@ endif()
 
 clickhouse_program_add(server)
 
-target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fda8bafde59..975bf9bb618 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
     endforeach ()
 endmacro ()
 
-dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
+dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
 target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
 
 if (TARGET ch_contrib::llvm)
@@ -561,7 +561,6 @@ if (ENABLE_NLP)
     dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
     dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
     dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
-    target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
 endif()
 
 if (TARGET ch_contrib::ulid)

From 169b9d5cc0c8dc54d31bc7229204b195f294c877 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 05:49:06 +0200
Subject: [PATCH 214/242] Fix tidy

---
 src/Functions/GregorianDate.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
index da1172c8916..aaaeeb7339d 100644
--- a/src/Functions/GregorianDate.cpp
+++ b/src/Functions/GregorianDate.cpp
@@ -20,12 +20,12 @@ namespace ErrorCodes
 
 namespace
 {
-    static inline constexpr bool is_leap_year(int32_t year)
+    inline constexpr bool is_leap_year(int32_t year)
     {
         return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
     }
 
-    static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
+    inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
     {
         switch (month)
         {
@@ -49,7 +49,7 @@ namespace
     /** Integer division truncated toward negative infinity.
       */
     template <typename I, typename J>
-    static inline constexpr I div(I x, J y)
+    inline constexpr I div(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         if (x > 0 && y_cast < 0)
@@ -63,7 +63,7 @@ namespace
     /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
       */
     template <typename I, typename J>
-    static inline constexpr I mod(I x, J y)
+    inline constexpr I mod(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         const auto r = x % y_cast;
@@ -76,13 +76,13 @@ namespace
     /** Like std::min(), but the type of operands may differ.
       */
     template <typename I, typename J>
-    static inline constexpr I min(I x, J y)
+    inline constexpr I min(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         return x < y_cast ? x : y_cast;
     }
 
-    static inline char readDigit(ReadBuffer & in)
+    inline char readDigit(ReadBuffer & in)
     {
         char c;
         if (!in.read(c))
@@ -93,7 +93,7 @@ namespace
             return c - '0';
     }
 
-    static inline bool tryReadDigit(ReadBuffer & in, char & c)
+    inline bool tryReadDigit(ReadBuffer & in, char & c)
     {
         if (in.read(c) && c >= '0' && c <= '9')
         {

From d7f7f16fbcfa8063e295708b4feb3b0079ad05f0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 10:44:13 +0200
Subject: [PATCH 215/242] Introduce
 IStorage::supportsTrivialCountOptimization()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/InterpreterSelectQuery.cpp | 3 +--
 src/Planner/PlannerJoinTree.cpp             | 3 +++
 src/Storages/IStorage.h                     | 3 +++
 src/Storages/MergeTree/MergeTreeData.h      | 2 ++
 src/Storages/StorageMaterializedMySQL.h     | 2 ++
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d07a6521544..fc3ea3a13ca 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2274,8 +2274,7 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
         && !settings.allow_experimental_query_deduplication
         && !settings.empty_result_for_aggregation_by_empty_set
         && storage
-        && storage->getName() != "MaterializedMySQL"
-        && !storage->hasLightweightDeletedMask()
+        && storage->supportsTrivialCountOptimization()
         && query_info.filter_asts.empty()
         && query_analyzer->hasAggregation()
         && (query_analyzer->aggregates().size() == 1)
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 5d8f8ca8741..c118fccded4 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible(
         return false;
 
     const auto & storage = table_node.getStorage();
+    if (!storage->supportsTrivialCountOptimization())
+        return false;
+
     auto storage_id = storage->getStorageID();
     auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(),
         storage_id.getTableName(),
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 76641b656a2..701e02a85ac 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -254,6 +254,9 @@ public:
     /// because those are internally translated into 'ALTER UDPATE' mutations.
     virtual bool supportsDelete() const { return false; }
 
+    /// Return true if the trivial count query could be optimized without reading the data at all.
+    virtual bool supportsTrivialCountOptimization() const { return false; }
+
 private:
 
     StorageID storage_id;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 41fc4657854..5e6b043c31c 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -434,6 +434,8 @@ public:
 
     bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
 
+    bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); }
+
     NamesAndTypesList getVirtuals() const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override;
diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h
index 08fbb61960f..e6fcbc203e6 100644
--- a/src/Storages/StorageMaterializedMySQL.h
+++ b/src/Storages/StorageMaterializedMySQL.h
@@ -41,6 +41,8 @@ public:
 
     void drop() override { nested_storage->drop(); }
 
+    bool supportsTrivialCountOptimization() const override { return false; }
+
 private:
     [[noreturn]] static void throwNotAllowed()
     {

From a0070eda02736903b984518daf3d1c79bfe5fd94 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 10:48:21 +0200
Subject: [PATCH 216/242] Slightly optimize code in
 ClusterProxy::executeQuery()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 3dea52faf46..5efba383e4b 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -176,11 +176,9 @@ void executeQuery(
     size_t shards = query_info.getCluster()->getShardCount();
     for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
     {
-        ASTPtr query_ast_for_shard;
-        if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
+        ASTPtr query_ast_for_shard = query_ast->clone();
+        if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
         {
-            query_ast_for_shard = query_ast->clone();
-
             OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
                 sharding_key_expr,
                 sharding_key_expr->getSampleBlock().getByPosition(0).type,
@@ -191,8 +189,6 @@ void executeQuery(
             OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
             visitor.visit(query_ast_for_shard);
         }
-        else
-            query_ast_for_shard = query_ast->clone();
 
         if (shard_filter_generator)
         {

From 67095d2150cafc91c0eebea4a17a8dc5f17b307c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 10:48:58 +0200
Subject: [PATCH 217/242] Fix comment for function argument in
 TableFunctionRemote

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/TableFunctions/TableFunctionRemote.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp
index 4143014a7b3..e6d72ddf17b 100644
--- a/src/TableFunctions/TableFunctionRemote.cpp
+++ b/src/TableFunctions/TableFunctionRemote.cpp
@@ -264,7 +264,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
             secure,
             /* priority= */ Priority{1},
             /* cluster_name= */ "",
-            /* password= */ ""
+            /* cluster_secret= */ ""
         };
         cluster = std::make_shared<Cluster>(context->getSettingsRef(), names, params);
     }

From b22247609036020e9bc4da64f1a297e49c29edfa Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 14:19:08 +0200
Subject: [PATCH 218/242] Add ability to pass table for connections checks
 per-shard to ReadFromRemote

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 1 +
 src/Interpreters/ClusterProxy/SelectStreamFactory.h   | 2 ++
 src/Processors/QueryPlan/ReadFromRemote.cpp           | 6 ++++--
 src/Processors/QueryPlan/ReadFromRemote.h             | 1 +
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 0cf3f360994..953e38d56cd 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
     {
         remote_shards.emplace_back(Shard{
             .query = query_ast,
+            .main_table = main_table,
             .header = header,
             .shard_info = shard_info,
             .lazy = lazy,
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index 030c0b77dd5..1cc5a3b1a77 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -50,6 +50,8 @@ public:
     {
         /// Query and header may be changed depending on shard.
         ASTPtr query;
+        /// Used to check the table existence on remote node
+        StorageID main_table;
         Block header;
 
         Cluster::ShardInfo shard_info;
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 5cc13f45df4..7a99c363232 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -162,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
             if (my_table_func_ptr)
                 try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, &current_settings, PoolMode::GET_MANY);
             else
-                try_results = my_shard.shard_info.pool->getManyChecked(timeouts, &current_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName());
+                try_results = my_shard.shard_info.pool->getManyChecked(
+                    timeouts, &current_settings, PoolMode::GET_MANY,
+                    my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName());
         }
         catch (const Exception & ex)
         {
@@ -241,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
     remote_query_executor->setPoolMode(PoolMode::GET_MANY);
 
     if (!table_func_ptr)
-        remote_query_executor->setMainTable(main_table);
+        remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table);
 
     pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
     addConvertingActions(pipes.back(), output_stream->header);
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index d4005d81f1b..ac869cd89f9 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr<Throttler>;
 class ReadFromRemote final : public ISourceStep
 {
 public:
+    /// @param main_table_ if Shards contains main_table then this parameter will be ignored
     ReadFromRemote(
         ClusterProxy::SelectStreamFactory::Shards shards_,
         Block header_,

From 83c0f03b98d6b3cbd10f9690256aed2fada47177 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 14:21:53 +0200
Subject: [PATCH 219/242] Change signature of the updateSettingsForCluster() to
 avoid cluster requirement

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 11 ++++++++---
 src/Interpreters/ClusterProxy/executeQuery.h   |  8 ++++++--
 src/Storages/getStructureOfRemoteTable.cpp     |  4 ++--
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 5efba383e4b..2fed626ffb7 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -35,7 +35,12 @@ namespace ErrorCodes
 namespace ClusterProxy
 {
 
-ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+    ContextPtr context,
+    const Settings & settings,
+    const StorageID & main_table,
+    const SelectQueryInfo * query_info,
+    Poco::Logger * log)
 {
     Settings new_settings = settings;
     new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
@@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
     /// If "secret" (in remote_servers) is not in use,
     /// user on the shard is not the same as the user on the initiator,
     /// hence per-user limits should not be applied.
-    if (cluster.getSecret().empty())
+    if (!interserver_mode)
     {
         /// Does not matter on remote servers, because queries are sent under different user.
         new_settings.max_concurrent_queries_for_user = 0;
@@ -170,7 +175,7 @@ void executeQuery(
     std::vector<QueryPlanPtr> plans;
     SelectStreamFactory::Shards remote_shards;
 
-    auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
+    auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
     new_context->increaseDistributedDepth();
 
     size_t shards = query_info.getCluster()->getShardCount();
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 41f6da55686..511914e99e4 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -34,8 +34,12 @@ class SelectStreamFactory;
 ///   - optimize_skip_unused_shards_nesting
 ///
 /// @return new Context with adjusted settings
-ContextMutablePtr updateSettingsForCluster(
-    const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+    ContextPtr context,
+    const Settings & settings,
+    const StorageID & main_table,
+    const SelectQueryInfo * query_info = nullptr,
+    Poco::Logger * log = nullptr);
 
 using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
 /// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp
index e5fc01be9f4..cbed05e30ed 100644
--- a/src/Storages/getStructureOfRemoteTable.cpp
+++ b/src/Storages/getStructureOfRemoteTable.cpp
@@ -58,7 +58,7 @@ ColumnsDescription getStructureOfRemoteTableInShard(
     }
 
     ColumnsDescription res;
-    auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id);
+    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id);
 
     /// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
     /// since this is a service query and should not lead to query failure.
@@ -177,7 +177,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
     const auto & shards_info = cluster.getShardsInfo();
     auto query = "DESC TABLE " + remote_table_id.getFullTableName();
 
-    auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id);
+    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id);
     new_context->setSetting("describe_extend_object_types", true);
 
     /// Expect only needed columns from the result of DESC TABLE.

From 323128df6f3c779f3b2fe4a751fa98372a54fbbb Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 23 Jun 2023 15:02:32 +0200
Subject: [PATCH 220/242] Remove non existing ctor of Cluster::Address

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/Cluster.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index de10a445d01..b90acd1d576 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -144,12 +144,6 @@ public:
             UInt32 shard_index_ = 0,
             UInt32 replica_index_ = 0);
 
-        Address(
-            const String & host_port_,
-            const ClusterConnectionParameters & params,
-            UInt32 shard_index_,
-            UInt32 replica_index_);
-
         Address(
             const DatabaseReplicaInfo & info,
             const ClusterConnectionParameters & params,

From 4a33e027c518f51d120c60b21ccd962264e1356a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 20 Jun 2023 17:31:45 +0200
Subject: [PATCH 221/242] Split StorageReplicatedMergeTree reading methods

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/StorageReplicatedMergeTree.cpp | 141 ++++++++++++--------
 src/Storages/StorageReplicatedMergeTree.h   |  32 ++++-
 2 files changed, 119 insertions(+), 54 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 13c0fb3f7c2..4e053c4598c 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4902,67 +4902,102 @@ void StorageReplicatedMergeTree::read(
         snapshot_data.alter_conversions = {};
     });
 
-    /** The `select_sequential_consistency` setting has two meanings:
-    * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
-    * 2. Do not read parts that have not yet been written to the quorum of the replicas.
-    * For this you have to synchronously go to ZooKeeper.
-    */
-    if (local_context->getSettingsRef().select_sequential_consistency)
-    {
-        auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
-        if (auto plan = reader.read(
-                column_names, storage_snapshot, query_info, local_context,
-                max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false))
-            query_plan = std::move(*plan);
-        return;
-    }
+    const auto & settings = local_context->getSettingsRef();
+
+    /// The `select_sequential_consistency` setting has two meanings:
+    /// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
+    /// 2. Do not read parts that have not yet been written to the quorum of the replicas.
+    /// For this you have to synchronously go to ZooKeeper.
+    if (settings.select_sequential_consistency)
+        return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
 
     if (local_context->canUseParallelReplicasOnInitiator())
+        return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
+
+    readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
+}
+
+void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl(
+    QueryPlan & query_plan,
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr local_context,
+    QueryProcessingStage::Enum processed_stage,
+    size_t max_block_size,
+    size_t num_streams)
+{
+    auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
+    auto plan = reader.read(column_names, storage_snapshot, query_info, local_context,
+            max_block_size, num_streams, processed_stage, std::move(max_added_blocks),
+            /* enable_parallel_reading= */false);
+    if (plan)
+        query_plan = std::move(*plan);
+}
+
+void StorageReplicatedMergeTree::readParallelReplicasImpl(
+    QueryPlan & query_plan,
+    const Names & /*column_names*/,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr local_context,
+    QueryProcessingStage::Enum processed_stage,
+    const size_t /*max_block_size*/,
+    const size_t /*num_streams*/)
+{
+    auto table_id = getStorageID();
+
+    auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
+
+    ASTPtr modified_query_ast;
+    Block header;
+    if (local_context->getSettingsRef().allow_experimental_analyzer)
     {
-        auto table_id = getStorageID();
+        auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
 
-        ASTPtr modified_query_ast;
-
-        Block header;
-
-        if (local_context->getSettingsRef().allow_experimental_analyzer)
-        {
-            auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
-
-            header = InterpreterSelectQueryAnalyzer::getSampleBlock(
-                modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
-            modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
-        }
-        else
-        {
-            modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
-                table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
-            header
-                = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
-        }
-
-        auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
-
-        ClusterProxy::SelectStreamFactory select_stream_factory =
-            ClusterProxy::SelectStreamFactory(
-                header,
-                {},
-                storage_snapshot,
-                processed_stage);
-
-        ClusterProxy::executeQueryWithParallelReplicas(
-            query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr,
-            select_stream_factory, modified_query_ast,
-            local_context, query_info, cluster);
+        header = InterpreterSelectQueryAnalyzer::getSampleBlock(
+            modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
+        modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
     }
     else
     {
-        if (auto plan = reader.read(
-            column_names, storage_snapshot, query_info,
-            local_context, max_block_size, num_streams,
-            processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower()))
-            query_plan = std::move(*plan);
+        modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
+            table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
+        header
+            = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
     }
+
+    ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory(
+        header,
+        {},
+        storage_snapshot,
+        processed_stage);
+
+    ClusterProxy::executeQueryWithParallelReplicas(
+        query_plan, getStorageID(),
+        /* table_func_ptr= */ nullptr,
+        select_stream_factory, modified_query_ast,
+        local_context, query_info, parallel_replicas_cluster);
+}
+
+void StorageReplicatedMergeTree::readLocalImpl(
+    QueryPlan & query_plan,
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr local_context,
+    QueryProcessingStage::Enum processed_stage,
+    const size_t max_block_size,
+    const size_t num_streams)
+{
+    auto plan = reader.read(
+        column_names, storage_snapshot, query_info,
+        local_context, max_block_size, num_streams,
+        processed_stage,
+        /* max_block_numbers_to_read= */ nullptr,
+        /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower());
+    if (plan)
+        query_plan = std::move(*plan);
 }
 
 template <class Func>
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 1a1b3c3b10c..ded940bc1d2 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -130,7 +130,7 @@ public:
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
-        ContextPtr context,
+        ContextPtr local_context,
         QueryProcessingStage::Enum processed_stage,
         size_t max_block_size,
         size_t num_streams) override;
@@ -513,6 +513,36 @@ private:
 
     static std::optional<QueryPipeline> distributedWriteFromClusterStorage(const std::shared_ptr<IStorageCluster> & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context);
 
+    void readLocalImpl(
+        QueryPlan & query_plan,
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr local_context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams);
+
+    void readLocalSequentialConsistencyImpl(
+        QueryPlan & query_plan,
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr local_context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams);
+
+    void readParallelReplicasImpl(
+        QueryPlan & query_plan,
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr local_context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams);
+
     template <class Func>
     void foreachActiveParts(Func && func, bool select_sequential_consistency) const;
 

From ac54be9652414e10a1b79ec4f92439db5155310b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 05:56:18 +0200
Subject: [PATCH 222/242] Fix a test

---
 tests/integration/test_backward_compatibility/test_functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py
index fa24b146fec..c86c3ba0ab2 100644
--- a/tests/integration/test_backward_compatibility/test_functions.py
+++ b/tests/integration/test_backward_compatibility/test_functions.py
@@ -143,6 +143,7 @@ def test_string_functions(start_cluster):
         "position",
         "substring",
         "CAST",
+        "getTypeSerializationStreams",
         # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument
         # 22.8 Backward Incompatible Change: Extended range of Date32
         "toDate32OrZero",

From 2389e0f0b68d03ecbb117745ed00c54979715ea7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 17 Jul 2023 09:54:51 +0200
Subject: [PATCH 223/242] Randomize timezone in tests across non-deterministic
 around 1970 and default

There was some cases when some patches to the datetime code leads to
flaky tests, due to the tests itself had been runned using regular
timezone (TZ).

But if you will this tests with something "specific" (that is not
strictly defined around 1970 year), those tests will fail.

So to catch such issues in the PRs itself, let's randomize
session_timezone as well.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/stateless/run.sh |  3 +++
 tests/clickhouse-test        | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index fe53925ecc8..3694fb7c2f6 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -4,6 +4,9 @@
 set -e -x -a
 
 # Choose random timezone for this test run.
+#
+# NOTE: that clickhouse-test will randomize session_timezone by itself as well
+# (it will choose between default server timezone and something specific).
 TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab  | awk '{print $3}' | shuf | head -n1)"
 echo "Choosen random timezone $TZ"
 ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index abd109d00b2..185e3003c95 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -529,6 +529,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
     return gen
 
 
+# To keep dependency list as short as possible, tzdata is not used here (to
+# avoid try/except block for import)
+def get_localzone():
+    return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:]))
+
+
 class SettingsRandomizer:
     settings = {
         "max_insert_threads": lambda: 0
@@ -602,6 +608,19 @@ class SettingsRandomizer:
         "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(
             0, 1
         ),
+        "session_timezone": lambda: random.choice(
+            [
+                # special non-deterministic around 1970 timezone, see [1].
+                #
+                #   [1]: https://github.com/ClickHouse/ClickHouse/issues/42653
+                "America/Mazatlan",
+                "America/Hermosillo",
+                "Mexico/BajaSur",
+                # server default that is randomized across all timezones
+                # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same.
+                get_localzone(),
+            ]
+        ),
     }
 
     @staticmethod

From bc167dfde81c44bb93ee7dd0c634ff3428ea3c33 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 18 Jul 2023 06:20:05 +0200
Subject: [PATCH 224/242] clickhouse-test: add proper escaping for HTTP
 parameters

The problem is that old versions of cURL (7.81.0 at least) handle
additional parameters incorrectly if in previous parameter was "/":

    $ docker run --rm curlimages/curl:8.1.2 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo
    > GET /?bar=foo/baz&query=select+1+format+Null HTTP/1.1
    > User-Agent: curl/8.1.2

    $ docker run --rm curlimages/curl:7.81.0 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo
    > GET /?bar=foo/baz?query=select+1+format+Null HTTP/1.1
    > User-Agent: curl/7.81.0-DEV

Note, that I thought about making the same for cli, but it is not that
easy, even after getting rid of sh -c and string contantenation, it
still cannot be done for CLICKHOUSE_CLIENT_OPT.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/clickhouse-test | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 185e3003c95..c63e1e3ae52 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -625,16 +625,16 @@ class SettingsRandomizer:
 
     @staticmethod
     def get_random_settings(args):
-        random_settings = []
+        random_settings = {}
         is_debug = BuildFlags.DEBUG in args.build_flags
         for setting, generator in SettingsRandomizer.settings.items():
             if (
                 is_debug
                 and setting == "allow_prefetched_read_pool_for_remote_filesystem"
             ):
-                random_settings.append(f"{setting}=0")
+                random_settings[setting] = 0
             else:
-                random_settings.append(f"{setting}={generator()}")
+                random_settings[setting] = generator()
         return random_settings
 
 
@@ -670,10 +670,10 @@ class MergeTreeSettingsRandomizer:
 
     @staticmethod
     def get_random_settings(args):
-        random_settings = []
+        random_settings = {}
         for setting, generator in MergeTreeSettingsRandomizer.settings.items():
             if setting not in args.changed_merge_tree_settings:
-                random_settings.append(f"{setting}={generator()}")
+                random_settings[setting] = generator()
         return random_settings
 
 
@@ -785,7 +785,14 @@ class TestCase:
 
     @staticmethod
     def cli_format_settings(settings_list) -> str:
-        return " ".join([f"--{setting}" for setting in settings_list])
+        out = []
+        for k, v in settings_list.items():
+            out.extend([f"--{k}", str(v)])
+        return " ".join(out)
+
+    @staticmethod
+    def http_format_settings(settings_list) -> str:
+        return urllib.parse.urlencode(settings_list)
 
     def has_show_create_table_in_test(self):
         return not subprocess.call(["grep", "-iq", "show create", self.case_file])
@@ -793,11 +800,12 @@ class TestCase:
     def add_random_settings(self, client_options):
         new_options = ""
         if self.randomize_settings:
+            http_params = self.http_format_settings(self.random_settings)
             if len(self.base_url_params) == 0:
-                os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
+                os.environ["CLICKHOUSE_URL_PARAMS"] = http_params
             else:
                 os.environ["CLICKHOUSE_URL_PARAMS"] = (
-                    self.base_url_params + "&" + "&".join(self.random_settings)
+                    self.base_url_params + "&" + http_params
                 )
 
             new_options += f" {self.cli_format_settings(self.random_settings)}"

From 6ae4d291800c7d9b32622f1d520f1ab27b9f90b7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 19 Jul 2023 13:22:31 +0200
Subject: [PATCH 225/242] Fix tests after session_timezone randomization

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../0_stateless/00387_use_client_time_zone.sh        |  3 ++-
 tests/queries/0_stateless/00427_alter_primary_key.sh | 11 ++++++-----
 tests/queries/0_stateless/00933_ttl_simple.sql       | 12 ++++++++++++
 ...42_system_reload_dictionary_reloads_completely.sh |  4 ++--
 .../0_stateless/01070_modify_ttl_recalc_only.sql     |  3 +++
 .../0_stateless/02530_dictionaries_update_field.sh   |  3 ++-
 6 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/00387_use_client_time_zone.sh b/tests/queries/0_stateless/00387_use_client_time_zone.sh
index 2a6d81eebfe..e54d5244eef 100755
--- a/tests/queries/0_stateless/00387_use_client_time_zone.sh
+++ b/tests/queries/0_stateless/00387_use_client_time_zone.sh
@@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"
+# NOTE: session_timezone overrides use_client_time_zone, disable it randomization
+env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"
diff --git a/tests/queries/0_stateless/00427_alter_primary_key.sh b/tests/queries/0_stateless/00427_alter_primary_key.sh
index 1269e2ad6e3..f9984384d79 100755
--- a/tests/queries/0_stateless/00427_alter_primary_key.sh
+++ b/tests/queries/0_stateless/00427_alter_primary_key.sh
@@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 function perform()
 {
     local query=$1
-    TZ=UTC $CLICKHOUSE_CLIENT \
-         --allow_deprecated_syntax_for_merge_tree=1 \
-        --use_client_time_zone=1 \
-        --input_format_values_interpret_expressions=0 \
-        --query "$query" 2>/dev/null
+    local settings=(
+        --allow_deprecated_syntax_for_merge_tree 1
+        --session_timezone UTC
+        --input_format_values_interpret_expressions 0
+    )
+    TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null
     if [ "$?" -ne 0 ]; then
         echo "query failed"
     fi
diff --git a/tests/queries/0_stateless/00933_ttl_simple.sql b/tests/queries/0_stateless/00933_ttl_simple.sql
index 2bf686822d5..ad40e7c7e47 100644
--- a/tests/queries/0_stateless/00933_ttl_simple.sql
+++ b/tests/queries/0_stateless/00933_ttl_simple.sql
@@ -1,3 +1,15 @@
+-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.:
+--
+--     SELECT
+--         now(),
+--         toDate(toTimeZone(now(), 'America/Mazatlan')),
+--         today()
+--
+--     ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐
+--     │ 2023-07-24 06:24:06 │                                    2023-07-23 │ 2023-07-24 │
+--     └─────────────────────┴───────────────────────────────────────────────┴────────────┘
+set session_timezone = '';
+
 drop table if exists ttl_00933_1;
 
 -- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts
diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
index f2b30e05040..9d34470c38d 100755
--- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
+++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
@@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 set -e -o pipefail
 
-# Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used
+$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF'
 DROP DATABASE IF EXISTS dictdb_01042;
 CREATE DATABASE dictdb_01042;
 CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple();
diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql
index 247e412484f..7ac70d41871 100644
--- a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql
+++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql
@@ -2,6 +2,9 @@
 
 set mutations_sync = 2;
 
+-- system.parts has server default, timezone cannot be randomized
+set session_timezone = '';
+
 drop table if exists ttl;
 
 create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d)
diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh
index 569466fe606..6ac10ea2308 100755
--- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh
+++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh
@@ -5,7 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -q "
+# NOTE: dictionaries will be updated according to server TZ, not session, so prohibit it's randomization
+$CLICKHOUSE_CLIENT --session_timezone '' -q "
     CREATE TABLE table_for_update_field_dictionary
     (
         key UInt64,

From ceaaa78fdcfac2243bcf28624336217bd44898f0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 08:04:38 +0200
Subject: [PATCH 226/242] Fix transform

---
 src/Functions/transform.cpp | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 1fc0e3adf96..a48d8d47489 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -156,15 +156,15 @@ namespace
         {
             initialize(arguments, result_type);
 
-            const auto * in = arguments.front().column.get();
-
-            if (isColumnConst(*in))
+            if (isColumnConst(*arguments[0].column))
                 return executeConst(arguments, result_type, input_rows_count);
 
             ColumnPtr default_non_const;
             if (!cache.default_column && arguments.size() == 4)
                 default_non_const = castColumn(arguments[3], result_type);
 
+            ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type);
+
             auto column_result = result_type->createColumn();
             if (cache.is_empty)
             {
@@ -174,30 +174,30 @@ namespace
             }
             else if (cache.table_num_to_idx)
             {
-                if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const)
-                    && !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const))
+                if (!executeNum<ColumnVector<UInt8>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<UInt16>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<UInt32>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<UInt64>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Int8>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Int16>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Int32>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Int64>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Float32>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnVector<Float64>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnDecimal<Decimal32>>(in.get(), *column_result, default_non_const)
+                    && !executeNum<ColumnDecimal<Decimal64>>(in.get(), *column_result, default_non_const))
                 {
                     throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
                 }
             }
             else if (cache.table_string_to_idx)
             {
-                if (!executeString(in, *column_result, default_non_const))
-                    executeContiguous(in, *column_result, default_non_const);
+                if (!executeString(in.get(), *column_result, default_non_const))
+                    executeContiguous(in.get(), *column_result, default_non_const);
             }
             else if (cache.table_anything_to_idx)
             {
-                executeAnything(in, *column_result, default_non_const);
+                executeAnything(in.get(), *column_result, default_non_const);
             }
             else
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized");
@@ -810,7 +810,6 @@ namespace
             cache.initialized = true;
         }
     };
-
 }
 
 REGISTER_FUNCTION(Transform)

From aaa0bf64fd888332bfa59c284508d4e7a84d372c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 08:05:55 +0200
Subject: [PATCH 227/242] Add a test

---
 .../02832_transform_fixed_string_no_default.reference            | 1 +
 .../0_stateless/02832_transform_fixed_string_no_default.sql      | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
 create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql

diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
new file mode 100644
index 00000000000..9daeafb9864
--- /dev/null
+++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
@@ -0,0 +1 @@
+test
diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql
new file mode 100644
index 00000000000..8d316d3413f
--- /dev/null
+++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql
@@ -0,0 +1 @@
+SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);

From 890a3754a6a093545122e42bcab066a27c72ed5e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 08:19:46 +0200
Subject: [PATCH 228/242] Fix error

---
 src/Functions/transform.cpp | 55 ++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index a48d8d47489..79168d82c54 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -156,14 +156,18 @@ namespace
         {
             initialize(arguments, result_type);
 
-            if (isColumnConst(*arguments[0].column))
+            const auto * in = arguments[0].column.get();
+
+            if (isColumnConst(*in))
                 return executeConst(arguments, result_type, input_rows_count);
 
             ColumnPtr default_non_const;
             if (!cache.default_column && arguments.size() == 4)
                 default_non_const = castColumn(arguments[3], result_type);
 
-            ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type);
+            ColumnPtr in_casted = arguments[0].column;
+            if (arguments.size() == 3)
+                in_casted = castColumn(arguments[0], result_type);
 
             auto column_result = result_type->createColumn();
             if (cache.is_empty)
@@ -174,30 +178,30 @@ namespace
             }
             else if (cache.table_num_to_idx)
             {
-                if (!executeNum<ColumnVector<UInt8>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt16>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt32>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<UInt64>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int8>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int16>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int32>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Int64>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Float32>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnVector<Float64>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnDecimal<Decimal32>>(in.get(), *column_result, default_non_const)
-                    && !executeNum<ColumnDecimal<Decimal64>>(in.get(), *column_result, default_non_const))
+                if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const, *in_casted)
+                    && !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const, *in_casted))
                 {
                     throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
                 }
             }
             else if (cache.table_string_to_idx)
             {
-                if (!executeString(in.get(), *column_result, default_non_const))
-                    executeContiguous(in.get(), *column_result, default_non_const);
+                if (!executeString(in, *column_result, default_non_const, *in_casted))
+                    executeContiguous(in, *column_result, default_non_const, *in_casted);
             }
             else if (cache.table_anything_to_idx)
             {
-                executeAnything(in.get(), *column_result, default_non_const);
+                executeAnything(in, *column_result, default_non_const, *in_casted);
             }
             else
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized");
@@ -218,7 +222,7 @@ namespace
             return impl->execute(args, result_type, input_rows_count);
         }
 
-        void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
+        void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
         {
             const size_t size = in->size();
             const auto & table = *cache.table_anything_to_idx;
@@ -236,11 +240,11 @@ namespace
                 else if (default_non_const)
                     column_result.insertFrom(*default_non_const, i);
                 else
-                    column_result.insertFrom(*in, i);
+                    column_result.insertFrom(in_casted, i);
             }
         }
 
-        void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
+        void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
         {
             const size_t size = in->size();
             const auto & table = *cache.table_string_to_idx;
@@ -255,12 +259,12 @@ namespace
                 else if (default_non_const)
                     column_result.insertFrom(*default_non_const, i);
                 else
-                    column_result.insertFrom(*in, i);
+                    column_result.insertFrom(in_casted, i);
             }
         }
 
         template <typename T>
-        bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+        bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
         {
             const auto * const in = checkAndGetColumn<T>(in_untyped);
             if (!in)
@@ -297,7 +301,7 @@ namespace
                     else if (default_non_const)
                         column_result.insertFrom(*default_non_const, i);
                     else
-                        column_result.insertFrom(*in, i);
+                        column_result.insertFrom(in_casted, i);
                 }
             }
             return true;
@@ -451,7 +455,7 @@ namespace
             }
         }
 
-        bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+        bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
         {
             const auto * const in = checkAndGetColumn<ColumnString>(in_untyped);
             if (!in)
@@ -488,7 +492,7 @@ namespace
                     else if (default_non_const)
                         column_result.insertFrom(*default_non_const, 0);
                     else
-                        column_result.insertFrom(*in, i);
+                        column_result.insertFrom(in_casted, i);
                 }
             }
             return true;
@@ -810,6 +814,7 @@ namespace
             cache.initialized = true;
         }
     };
+
 }
 
 REGISTER_FUNCTION(Transform)

From c79492240194f0d5dd9053c70a967c39a7536cb3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 08:20:30 +0200
Subject: [PATCH 229/242] More tests

---
 .../02832_transform_fixed_string_no_default.reference           | 2 ++
 .../0_stateless/02832_transform_fixed_string_no_default.sql     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
index 9daeafb9864..ea545c90391 100644
--- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
+++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference
@@ -1 +1,3 @@
 test
+
+\N
diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql
index 8d316d3413f..0e58c716c9f 100644
--- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql
+++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql
@@ -1 +1,3 @@
 SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);
+SELECT transform(name, ['test', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);
+SELECT transform(name, ['a', 'test'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);

From 0e46cf86b772e1513d837d6019181a6d291b7219 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 24 Jul 2023 08:52:19 +0200
Subject: [PATCH 230/242] Added try-except to check cases when second
 backup/restore is picked up first

---
 .../test_disallow_concurrency.py              | 69 +++++++++++++++----
 1 file changed, 57 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
index d0ce2e03016..a863a6e2047 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
@@ -133,9 +133,21 @@ def test_concurrent_backups_on_same_node():
     )
     assert status in ["CREATING_BACKUP", "BACKUP_CREATED"]
 
-    error = nodes[0].query_and_get_error(
-        f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
-    )
+    try:
+        error = nodes[0].query_and_get_error(
+            f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
+        )
+    except Exception as e:
+        status = (
+            nodes[0]
+            .query(f"SELECT status FROM system.backups WHERE id == '{id}'")
+            .rstrip("\n")
+        )
+        # It is possible that the second backup was picked up first, and then the async backup
+        if status == "CREATING_BACKUP" or status == "BACKUP_FAILED":
+            return
+        else:
+            raise e
     expected_errors = [
         "Concurrent backups not supported",
         f"Backup {backup_name} already exists",
@@ -179,9 +191,20 @@ def test_concurrent_backups_on_different_nodes():
     )
     assert status in ["CREATING_BACKUP", "BACKUP_CREATED"]
 
-    error = nodes[0].query_and_get_error(
-        f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
-    )
+    try:
+        error = nodes[0].query_and_get_error(
+            f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
+        )
+    except Exception as e:
+        status = (
+            nodes[1]
+            .query(f"SELECT status FROM system.backups WHERE id == '{id}'")
+            .rstrip("\n")
+        )
+        if status == "CREATING_BACKUP" or status == "BACKUP_FAILED":
+            return
+        else:
+            raise e
     expected_errors = [
         "Concurrent backups not supported",
         f"Backup {backup_name} already exists",
@@ -224,9 +247,20 @@ def test_concurrent_restores_on_same_node():
     )
     assert status in ["RESTORING", "RESTORED"]
 
-    error = nodes[0].query_and_get_error(
-        f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
-    )
+    try:
+        error = nodes[0].query_and_get_error(
+            f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
+        )
+    except Exception as e:
+        status = (
+            nodes[0]
+            .query(f"SELECT status FROM system.backups WHERE id == '{id}'")
+            .rstrip("\n")
+        )
+        if status == "RESTORING" or status == "RESTORE_FAILED":
+            return
+        else:
+            raise e
     expected_errors = [
         "Concurrent restores not supported",
         "Cannot restore the table default.tbl because it already contains some data",
@@ -269,9 +303,20 @@ def test_concurrent_restores_on_different_node():
     )
     assert status in ["RESTORING", "RESTORED"]
 
-    error = nodes[1].query_and_get_error(
-        f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
-    )
+    try:
+        error = nodes[1].query_and_get_error(
+            f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
+        )
+    except Exception as e:
+        status = (
+            nodes[0]
+            .query(f"SELECT status FROM system.backups WHERE id == '{id}'")
+            .rstrip("\n")
+        )
+        if status == "RESTORING" or status == "RESTORE_FAILED":
+            return
+        else:
+            raise e
     expected_errors = [
         "Concurrent restores not supported",
         "Cannot restore the table default.tbl because it already contains some data",

From 0401dc453e9502697328879728bf0dbf7c1dd9e0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 24 Jul 2023 10:14:23 +0200
Subject: [PATCH 231/242] Fix flakiness of test_version_update_after_mutation
 by enabling force_remove_data_recursively_on_drop

Since there can be some leftovers:

    2023.07.24 07:08:25.238066 [ 140 ] {} <Error> Application: Code: 219. DB::Exception: Cannot drop: filesystem error: in remove: Directory not empty ["/var/lib/clickhouse/data/system/"]. Probably database contain some detached tables or metadata leftovers from Ordinary engine. If you want to remove all data anyway, try to attach database back and drop it again with enabled force_remove_data_recursively_on_drop setting: Exception while trying to convert database system from Ordinary to Atomic. It may be in some intermediate state. You can finish conversion manually by moving the rest tables from system to .tmp_convert.system.9396432095832455195 (using RENAME TABLE) and executing DROP DATABASE system and RENAME DATABASE .tmp_convert.system.9396432095832455195 TO system. (DATABASE_NOT_EMPTY), Stack trace (when copying this message, always include the lines below):

    0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e68af57 in /usr/bin/clickhouse
    1. ? @ 0x000000000cab443c in /usr/bin/clickhouse
    2. DB::DatabaseOnDisk::drop(std::shared_ptr<DB::Context const>) @ 0x000000001328d617 in /usr/bin/clickhouse
    3. DB::DatabaseCatalog::detachDatabase(std::shared_ptr<DB::Context const>, String const&, bool, bool) @ 0x0000000013524a6c in /usr/bin/clickhouse
    4. DB::InterpreterDropQuery::executeToDatabaseImpl(DB::ASTDropQuery const&, std::shared_ptr<DB::IDatabase>&, std::vector<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>, std::allocator<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>>>&) @ 0x0000000013bc05e4 in /usr/bin/clickhouse
    5. DB::InterpreterDropQuery::executeToDatabase(DB::ASTDropQuery const&) @ 0x0000000013bbc6b8 in /usr/bin/clickhouse
    6. DB::InterpreterDropQuery::execute() @ 0x0000000013bbba22 in /usr/bin/clickhouse
    7. ? @ 0x00000000140b13a5 in /usr/bin/clickhouse
    8. DB::executeQuery(String const&, std::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum) @ 0x00000000140ad20e in /usr/bin/clickhouse
    9. ? @ 0x00000000140d2ef0 in /usr/bin/clickhouse
    10. DB::maybeConvertSystemDatabase(std::shared_ptr<DB::Context>) @ 0x00000000140d0aaf in /usr/bin/clickhouse
    11. DB::Server::main(std::vector<String, std::allocator<String>> const&) @ 0x000000000e724e55 in /usr/bin/clickhouse
    12. Poco::Util::Application::run() @ 0x0000000017ead086 in /usr/bin/clickhouse
    13. DB::Server::run() @ 0x000000000e714a5d in /usr/bin/clickhouse
    14. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000017ec07b9 in /usr/bin/clickhouse
    15. mainEntryClickHouseServer(int, char**) @ 0x000000000e711a26 in /usr/bin/clickhouse
    16. main @ 0x0000000008cf13cf in /usr/bin/clickhouse
    17. __libc_start_main @ 0x0000000000021b97 in /lib/x86_64-linux-gnu/libc-2.27.so
    18. _start @ 0x00000000080705ae in /usr/bin/clickhouse
     (version 23.7.1.2012)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/helpers/cluster.py                |  9 +++++++++
 .../force_remove_data_recursively_on_drop.xml       |  7 +++++++
 .../test_version_update_after_mutation/test.py      | 13 ++++++++++---
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index eff44de842a..0448eb2437f 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -3199,6 +3199,7 @@ class ClickHouseInstance:
     ):
         self.name = name
         self.base_cmd = cluster.base_cmd
+        self.base_dir = base_path
         self.docker_id = cluster.get_instance_docker_id(self.name)
         self.cluster = cluster
         self.hostname = hostname if hostname is not None else self.name
@@ -4193,6 +4194,14 @@ class ClickHouseInstance:
             ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
         )
 
+    def put_users_config(self, config_path):
+        """Put new config (useful if you cannot put it at the start)"""
+
+        instance_config_dir = p.abspath(p.join(self.path, "configs"))
+        users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
+        config_path = p.join(self.base_dir, config_path)
+        shutil.copy(config_path, users_d_dir)
+
     def create_dir(self):
         """Create the instance directory and all the needed files there."""
 
diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml
new file mode 100644
index 00000000000..7a00648b28e
--- /dev/null
+++ b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py
index c80205d48c1..416220c93c3 100644
--- a/tests/integration/test_version_update_after_mutation/test.py
+++ b/tests/integration/test_version_update_after_mutation/test.py
@@ -51,6 +51,12 @@ def start_cluster():
         cluster.shutdown()
 
 
+def restart_node(node):
+    # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
+    node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
+    node.restart_with_latest_version(signal=9, fix_metadata=True)
+
+
 def test_mutate_and_upgrade(start_cluster):
     for node in [node1, node2]:
         node.query("DROP TABLE IF EXISTS mt")
@@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):
 
     node2.query("DETACH TABLE mt")  # stop being leader
     node1.query("DETACH TABLE mt")  # stop being leader
-    node1.restart_with_latest_version(signal=9, fix_metadata=True)
-    node2.restart_with_latest_version(signal=9, fix_metadata=True)
+
+    restart_node(node1)
+    restart_node(node2)
 
     # After hard restart table can be in readonly mode
     exec_query_with_retry(
@@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
     # (We could be in process of creating some system table, which will leave empty directory on restart,
     # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
     node3.query("SYSTEM FLUSH LOGS")
-    node3.restart_with_latest_version(signal=9, fix_metadata=True)
+    restart_node(node3)
 
     # checks for readonly
     exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)

From efa638ef3cc7db3c6149b7c031cc4c7904987abd Mon Sep 17 00:00:00 2001
From: Val Doroshchuk <valbok@gmail.com>
Date: Wed, 19 Jul 2023 12:53:27 +0200
Subject: [PATCH 232/242] MaterializedMySQL: Support unquoted utf-8 strings in
 DDL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since ClickHouse does not support unquoted utf-8 strings but MySQL does.

Instead of fixing Lexer to recognize utf-8 chars as TokenType::BareWord,
suggesting to quote all unrecognized tokens before applying any DDL.

Actual parsing and validating the syntax will be done by particular Parser.

If there is any TokenType::Error, the query is unable to be parsed anyway.
Quoting such tokens can provide the support of utf-8 names.

See `tryQuoteUnrecognizedTokens` and `QuoteUnrecognizedTokensTest`.

mysql> CREATE TABLE 道.渠(...

is converted to

CREATE TABLE `道`.`渠`(...

Also fixed the bug with missing * while doing SELECT in full sync because db or table name are back quoted when not needed.
---
 src/Common/quoteString.cpp                    |  11 +
 src/Common/quoteString.h                      |   3 +
 .../MySQL/MaterializedMySQLSyncThread.cpp     |   7 +-
 .../gtest_try_quote_unrecognized_tokens.cpp   | 289 ++++++++++++++++++
 .../MySQL/tryQuoteUnrecognizedTokens.cpp      |  96 ++++++
 .../MySQL/tryQuoteUnrecognizedTokens.h        |  10 +
 src/Storages/StorageMySQL.cpp                 |  11 +-
 .../materialized_with_ddl.py                  | 122 ++++++++
 .../test_materialized_mysql_database/test.py  |   6 +
 9 files changed, 542 insertions(+), 13 deletions(-)
 create mode 100644 src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp
 create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
 create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.h

diff --git a/src/Common/quoteString.cpp b/src/Common/quoteString.cpp
index b464f4837a1..17129441c8f 100644
--- a/src/Common/quoteString.cpp
+++ b/src/Common/quoteString.cpp
@@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x)
     return res;
 }
 
+
+String backQuoteMySQL(StringRef x)
+{
+    String res(x.size, '\0');
+    {
+        WriteBufferFromString wb(res);
+        writeBackQuotedStringMySQL(x, wb);
+    }
+    return res;
+}
+
 }
diff --git a/src/Common/quoteString.h b/src/Common/quoteString.h
index b83988258e2..3f17d6e7621 100644
--- a/src/Common/quoteString.h
+++ b/src/Common/quoteString.h
@@ -24,4 +24,7 @@ String backQuote(StringRef x);
 /// Quote the identifier with backquotes, if required.
 String backQuoteIfNeed(StringRef x);
 
+/// Quote the identifier with backquotes, for use in MySQL queries.
+String backQuoteMySQL(StringRef x);
+
 }
diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
index 603bf3d0166..673bd155f77 100644
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@@ -4,6 +4,7 @@
 
 #include <Databases/MySQL/MaterializedMySQLSyncThread.h>
 #include <Databases/MySQL/tryParseTableIDFromDDL.h>
+#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
 #include <cstdlib>
 #include <random>
 #include <string_view>
@@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection,
                     { std::make_shared<DataTypeString>(),   "column_type" }
             };
 
-    const String & query =  "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
-                            " WHERE TABLE_SCHEMA = '"  + backQuoteIfNeed(database_name) +
-                            "' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) +  "' ORDER BY ORDINAL_POSITION";
+    String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
+                   " WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION";
 
     StreamSettings mysql_input_stream_settings(global_settings, false, true);
     auto mysql_source = std::make_unique<MySQLSource>(connection, query, tables_columns_sample_block, mysql_input_stream_settings);
@@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
         CurrentThread::QueryScope query_scope(query_context);
 
         String query = query_event.query;
+        tryQuoteUnrecognizedTokens(query, query);
         if (!materialized_tables_list.empty())
         {
             auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
diff --git a/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp
new file mode 100644
index 00000000000..9c76deb2712
--- /dev/null
+++ b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp
@@ -0,0 +1,289 @@
+#include <gtest/gtest.h>
+
+#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
+
+using namespace DB;
+
+struct TestCase
+{
+    String query;
+    String res;
+    bool ok;
+
+    TestCase(
+        const String & query_,
+        const String & res_,
+        bool ok_)
+        : query(query_)
+        , res(res_)
+        , ok(ok_)
+    {
+    }
+};
+
+std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case)
+{
+    return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok;
+}
+
+class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam<TestCase>
+{
+};
+
+TEST_P(QuoteUnrecognizedTokensTest, escape)
+{
+    const auto & [query, expected, ok] = GetParam();
+    String actual;
+    bool res = tryQuoteUnrecognizedTokens(query, actual);
+    EXPECT_EQ(ok, res);
+    EXPECT_EQ(expected, actual);
+}
+
+INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list<TestCase>{
+    {
+        "",
+        "",
+        false
+    },
+    {
+        "test '\"`",
+        "",
+        false
+    },
+    {
+        "SELECT * FROM db.`table`",
+        "",
+        false
+    },
+    {
+        "道渠",
+        "`道渠`",
+        true
+    },
+    {
+        "道",
+        "`道`",
+        true
+    },
+    {
+        "道道(skip) 道(",
+        "`道道`(skip) `道`(",
+        true
+    },
+    {
+        "`道渠`",
+        "",
+        false
+    },
+    {
+        "'道'",
+        "",
+        false
+    },
+    {
+        "\"道\"",
+        "",
+        false
+    },
+    {
+        "` 道 test 渠 `",
+        "",
+        false
+    },
+    {
+        "skip 道 skip 123",
+        "skip `道` skip 123",
+        true
+    },
+    {
+        "skip 123 `道` skip",
+        "",
+        false
+    },
+    {
+        "skip `道 skip 123",
+        "",
+        false
+    },
+    {
+        "skip test道 skip",
+        "skip `test道` skip",
+        true
+    },
+    {
+        "test道2test",
+        "`test道2test`",
+        true
+    },
+    {
+        "skip test道2test 123",
+        "skip `test道2test` 123",
+        true
+    },
+    {
+        "skip 您a您a您a a您a您a您a 1您2您3您4 skip",
+        "skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip",
+        true
+    },
+    {
+        "skip 您a 您a您a b您2您c您4 skip",
+        "skip `您a` `您a您a` `b您2您c您4` skip",
+        true
+    },
+    {
+        "123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip",
+        "`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip",
+        true
+    },
+    {
+        "_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip",
+        "`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip",
+        true
+    },
+    {
+        "TABLE 您2 您(",
+        "TABLE `您2` `您`(",
+        true
+    },
+    {
+        "TABLE 您.a您2(日2日2 INT",
+        "TABLE `您`.`a您2`(`日2日2` INT",
+        true
+    },
+    {
+        "TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)",
+        "TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)",
+        true
+    },
+    {
+        "TABLE 您a日.您a您a您a(test INT",
+        "TABLE `您a日`.`您a您a您a`(test INT",
+        true
+    },
+    {
+        "TABLE 您a日.您a您a您a(Hi您Hi好Hi INT",
+        "TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT",
+        true
+    },
+    {
+        "--TABLE 您a日.您a您a您a(test INT",
+        "",
+        false
+    },
+    {
+        "--您a日.您a您a您a(\n您Hi好",
+        "--您a日.您a您a您a(\n`您Hi好`",
+        true
+    },
+    {
+        " /* TABLE 您a日.您a您a您a(test INT",
+        "",
+        false
+    },
+    {
+        "/*您a日.您a您a您a(*/\n您Hi好",
+        "/*您a日.您a您a您a(*/\n`您Hi好`",
+        true
+    },
+    {
+        " 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a",
+        " `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`",
+        true
+    },
+    //{ TODO
+    //    "TABLE 您2.您a您a您a(test INT",
+    //    "TABLE `您2`.`您a您a您a`(test INT",
+    //    true
+    //},
+    {
+        "skip 您a您a您a skip",
+        "skip `您a您a您a` skip",
+        true
+    },
+    {
+        "test 您a2您3a您a 4 again",
+        "test `您a2您3a您a` 4 again",
+        true
+    },
+    {
+        "CREATE TABLE db.`道渠`",
+        "",
+        false
+    },
+    {
+        "CREATE TABLE db.`道渠",
+        "",
+        false
+    },
+    {
+        "CREATE TABLE db.道渠",
+        "CREATE TABLE db.`道渠`",
+        true
+    },
+    {
+        "CREATE TABLE db.     道渠",
+        "CREATE TABLE db.     `道渠`",
+        true
+    },
+    {
+        R"sql(
+        CREATE TABLE gb2312.`道渠` (   `id` int NOT NULL,
+            您 INT,
+            道渠 DATETIME,
+            您test INT, test您 INT, test您test INT,
+            道渠test INT, test道渠 INT, test道渠test INT,
+            您_ INT, _您 INT, _您_ INT,
+            您您__ INT, __您您 INT, __您您__ INT,
+            您2 INT, 2您 INT, 2您2 INT,
+            您您22 INT, 22您您 INT, 22您您22 INT,
+            您_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT,
+            您您__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT,
+            您2_ INT, 2_您 INT, 2_您2_ INT,
+            您您22__ INT, 22__您您 INT, 22__您您22__ INT,
+            您_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT,
+            您您_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT,
+            您test3 INT, test3您 INT, test3您test3 INT, test3您3test INT,
+            您您test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test  INT,
+            您3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT,
+            您您3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT,
+            您_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT,
+            您您_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT,
+            您_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT,
+            test_日期     varchar(256), test_道_2     varchar(256) NOT NULL   ,
+            test_道渠您_3
+                BIGINT  NOT NULL,
+            道您3_test INT,
+            PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
+        )sql",
+        R"sql(
+        CREATE TABLE gb2312.`道渠` (   `id` int NOT NULL,
+            `您` INT,
+            `道渠` DATETIME,
+            `您test` INT, `test您` INT, `test您test` INT,
+            `道渠test` INT, `test道渠` INT, `test道渠test` INT,
+            `您_` INT, `_您` INT, `_您_` INT,
+            `您您__` INT, `__您您` INT, `__您您__` INT,
+            `您2` INT, `2您` INT, `2您2` INT,
+            `您您22` INT, `22您您` INT, `22您您22` INT,
+            `您_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT,
+            `您您__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT,
+            `您2_` INT, `2_您` INT, `2_您2_` INT,
+            `您您22__` INT, `22__您您` INT, `22__您您22__` INT,
+            `您_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT,
+            `您您_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT,
+            `您test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT,
+            `您您test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test`  INT,
+            `您3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT,
+            `您您3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT,
+            `您_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT,
+            `您您_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT,
+            `您_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT,
+            `test_日期`     varchar(256), `test_道_2`     varchar(256) NOT NULL   ,
+            `test_道渠您_3`
+                BIGINT  NOT NULL,
+            `道您3_test` INT,
+            PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
+        )sql",
+        true
+    },
+}));
diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
new file mode 100644
index 00000000000..cd4603ddaec
--- /dev/null
+++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
@@ -0,0 +1,96 @@
+#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
+#include <Parsers/CommonParsers.h>
+#include <Common/quoteString.h>
+
+namespace DB
+{
+
+/// Checks if there are no any tokens (like whitespaces) between current and previous pos
+static bool noWhitespaces(const char * to, const char * from)
+{
+    return static_cast<size_t>(from - to) == 0;
+}
+
+/// Checks if the token should be quoted too together with unrecognized
+static bool isWordOrNumber(TokenType type)
+{
+    return type == TokenType::BareWord || type == TokenType::Number;
+}
+
+static void quoteLiteral(
+    IParser::Pos & pos,
+    IParser::Pos & pos_prev,
+    const char *& pos_unrecognized,
+    const char *& copy_from,
+    String & rewritten_query)
+{
+    /// Copy also whitespaces if any
+    const auto * end =
+        isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin)
+        ? pos->end
+        : pos_prev->end;
+    String literal(pos_unrecognized, static_cast<size_t>(end - pos_unrecognized));
+    rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal));
+    copy_from = end;
+}
+
+bool tryQuoteUnrecognizedTokens(const String & query, String & res)
+{
+    Tokens tokens(query.data(), query.data() + query.size());
+    IParser::Pos pos(tokens, 0);
+    Expected expected;
+    String rewritten_query;
+    const char * copy_from = query.data();
+    auto pos_prev = pos;
+    const char * pos_unrecognized = nullptr;
+    for (;pos->type != TokenType::EndOfStream; ++pos)
+    {
+        /// Commit quotes if any whitespaces found or the token is not a word
+        bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type));
+        if (pos_unrecognized && commit)
+        {
+            quoteLiteral(
+                pos,
+                pos_prev,
+                pos_unrecognized,
+                copy_from,
+                rewritten_query);
+            pos_unrecognized = nullptr;
+        }
+        if (pos->type == TokenType::Error)
+        {
+            /// Find first appearance of the error token
+            if (!pos_unrecognized)
+            {
+                pos_unrecognized =
+                    isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin)
+                    ? pos_prev->begin
+                    : pos->begin;
+            }
+        }
+        pos_prev = pos;
+    }
+
+    /// There was EndOfStream but not committed unrecognized token
+    if (pos_unrecognized)
+    {
+        quoteLiteral(
+            pos,
+            pos_prev,
+            pos_unrecognized,
+            copy_from,
+            rewritten_query);
+        pos_unrecognized = nullptr;
+    }
+
+    /// If no Errors found
+    if (copy_from == query.data())
+        return false;
+
+    auto size = static_cast<size_t>(pos->end - copy_from);
+    rewritten_query.append(copy_from, size);
+    res = rewritten_query;
+    return true;
+}
+
+}
diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h
new file mode 100644
index 00000000000..582a297c485
--- /dev/null
+++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <base/types.h>
+
+namespace DB
+{
+
+bool tryQuoteUnrecognizedTokens(const String & query, String & res);
+
+}
diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp
index 3e928c3a811..b0a220eb1d2 100644
--- a/src/Storages/StorageMySQL.cpp
+++ b/src/Storages/StorageMySQL.cpp
@@ -19,6 +19,7 @@
 #include <Processors/Sinks/SinkToStorage.h>
 #include <QueryPipeline/Pipe.h>
 #include <Common/parseRemoteDescription.h>
+#include <Common/quoteString.h>
 #include <Common/logger_useful.h>
 #include <Storages/NamedCollectionsHelpers.h>
 #include <Databases/MySQL/FetchTablesColumnsList.h>
@@ -34,16 +35,6 @@ namespace ErrorCodes
     extern const int UNKNOWN_TABLE;
 }
 
-static String backQuoteMySQL(const String & x)
-{
-    String res(x.size(), '\0');
-    {
-        WriteBufferFromString wb(res);
-        writeBackQuotedStringMySQL(x, wb);
-    }
-    return res;
-}
-
 StorageMySQL::StorageMySQL(
     const StorageID & table_id_,
     mysqlxx::PoolWithFailover && pool_,
diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
index c97c3e5e2a8..9130ccc359c 100644
--- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py
@@ -1581,6 +1581,128 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE utf8mb4_test")
 
 
+def utf8mb4_column_test(clickhouse_node, mysql_node, service_name):
+    db = "utf8mb4_column_test"
+    mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"CREATE DATABASE {db}")
+
+    # Full sync
+    mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)")
+    mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)")
+    mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())")
+    mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())")
+    clickhouse_node.query(
+        f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
+    )
+
+    # Full sync replicated unquoted columns names since they use SHOW CREATE TABLE
+    # which returns quoted column names
+    check_query(
+        clickhouse_node,
+        f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}",
+        "quoted\nunquoted\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted",
+        "1\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted",
+        "1\n",
+    )
+
+    # Inc sync
+    mysql_node.query(
+        f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)"
+    )
+    mysql_node.query(
+        f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)"
+    )
+    mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())")
+    mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())")
+    mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())")
+    mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())")
+    check_query(
+        clickhouse_node,
+        f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted",
+        "2\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new",
+        "1\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted",
+        "2\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new",
+        "1\n",
+    )
+
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+    mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+
+
+def utf8mb4_name_test(clickhouse_node, mysql_node, service_name):
+    db = "您Hi您"
+    table = "日期"
+    mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+    mysql_node.query(f"CREATE DATABASE `{db}`")
+    mysql_node.query(
+        f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
+    )
+    mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())")
+    mysql_node.query(
+        f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
+    )
+    mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())")
+    clickhouse_node.query(
+        f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`",
+        "1\n",
+    )
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`",
+        "1\n",
+    )
+
+    # Inc sync
+    mysql_node.query(
+        f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
+    )
+    mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())")
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`",
+        "1\n",
+    )
+
+    mysql_node.query(
+        f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
+    )
+    mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())")
+    check_query(
+        clickhouse_node,
+        f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`",
+        "1\n",
+    )
+
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+    mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
+
+
 def system_parts_test(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE IF EXISTS system_parts_test")
     clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test")
diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py
index 32c1da8a2bd..e31ef70b4ad 100644
--- a/tests/integration/test_materialized_mysql_database/test.py
+++ b/tests/integration/test_materialized_mysql_database/test.py
@@ -381,6 +381,12 @@ def test_utf8mb4(
 ):
     materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57")
     materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80")
+    materialized_with_ddl.utf8mb4_column_test(
+        clickhouse_node, started_mysql_8_0, "mysql80"
+    )
+    materialized_with_ddl.utf8mb4_name_test(
+        clickhouse_node, started_mysql_8_0, "mysql80"
+    )
 
 
 def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):

From 3710c7238d9eaf0328170bafb03eb4b15ea5d67c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 24 Jul 2023 09:19:06 +0000
Subject: [PATCH 233/242] Fix test_throttling

---
 tests/integration/test_throttling/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index ff8e7154d0d..2b5e9312a4c 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -114,7 +114,7 @@ def node_update_config(mode, setting, value=None):
 
 
 def assert_took(took, should_took):
-    assert took >= should_took[0] * 0.9 and took < should_took[1]
+    assert took >= should_took[0] * 0.85 and took < should_took[1]
 
 
 @pytest.mark.parametrize(

From 5da6c99f6df90ae5a8dde59f9cccce8cee48fc61 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Mon, 24 Jul 2023 12:02:27 +0200
Subject: [PATCH 234/242] Add comment

---
 tests/integration/test_throttling/test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index 2b5e9312a4c..62640394a85 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -114,6 +114,9 @@ def node_update_config(mode, setting, value=None):
 
 
 def assert_took(took, should_took):
+    # we need to decrease the lower limit because the server limits could
+    # be enforced by throttling some server background IO instead of query IO
+    # and we have no control over it
     assert took >= should_took[0] * 0.85 and took < should_took[1]
 
 
From df5ff1383c5c6f7e24cb6933246fc04cf5dfe702 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 24 Jul 2023 14:57:05 +0200
Subject: [PATCH 235/242] Fix settings not applied for explain query when
 format provided (#51859)

---
 src/Interpreters/InterpreterSetQuery.cpp       |  3 +++
 ..._explain_settings_not_applied_bug.reference | 11 +++++++++++
 .../02798_explain_settings_not_applied_bug.sql | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+)
 create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference
 create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql

diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index 6db57a4f950..e9118b747e5 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     }
     else if (const auto * explain_query = ast->as<ASTExplainQuery>())
     {
+        if (explain_query->settings_ast)
+            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
+
         applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
     }
     else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference
new file mode 100644
index 00000000000..6fc36a0ba01
--- /dev/null
+++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference
@@ -0,0 +1,11 @@
+ [1mexplain[0m                        
+
+ (Expression)                    
+ ExpressionTransform             
+   (Aggregating)                 
+   FinalizeAggregatedTransform   
+     AggregatingInOrderTransform 
+       (Expression)              
+       ExpressionTransform       
+         (ReadFromMergeTree)     
+         MergeTreeInOrder 0 → 1  
diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql
new file mode 100644
index 00000000000..76f2129abfa
--- /dev/null
+++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql
@@ -0,0 +1,18 @@
+SET read_in_order_two_level_merge_threshold=1000000;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(a UInt64)
+ENGINE = MergeTree
+ORDER BY a;
+
+INSERT INTO t SELECT * FROM numbers_mt(1e3);
+OPTIMIZE TABLE t FINAL;
+
+EXPLAIN PIPELINE
+SELECT a
+FROM t
+GROUP BY a
+FORMAT PrettySpace
+SETTINGS optimize_aggregation_in_order = 1;
+
+DROP TABLE t;

From c7239c64ea36a6994cd88d34edc3774243472a68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 24 Jul 2023 15:16:44 +0200
Subject: [PATCH 236/242] Remove unused code

---
 src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 48adf36e678..3eba9a9de24 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -145,9 +145,6 @@ bool IMergeTreeSelectAlgorithm::getNewTask()
 
 ChunkAndProgress IMergeTreeSelectAlgorithm::read()
 {
-    size_t num_read_rows = 0;
-    size_t num_read_bytes = 0;
-
     while (!is_cancelled)
     {
         try
@@ -178,10 +175,6 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read()
                 ordered_columns.push_back(res.block.getByName(name).column);
             }
 
-            /// Account a progress from previous empty chunks.
-            res.num_read_rows += num_read_rows;
-            res.num_read_bytes += num_read_bytes;
-
             return ChunkAndProgress{
                 .chunk = Chunk(ordered_columns, res.row_count),
                 .num_read_rows = res.num_read_rows,
@@ -194,7 +187,7 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read()
         }
     }
 
-    return {Chunk(), num_read_rows, num_read_bytes, true};
+    return {Chunk(), 0, 0, true};
 }
 
 void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask(

From c6e6fd761317662c05532d695c20be72f8e847d2 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 24 Jul 2023 15:58:21 +0200
Subject: [PATCH 237/242] Shard `OpenedFileCache` to avoid lock contention
 (#51341)

* shard OpenedFileCache to avoid lock contention

* Update OpenedFileCache.h

* fix build

---------

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 src/Common/ProfileEvents.cpp |   1 +
 src/IO/OpenedFileCache.h     | 109 +++++++++++++++++++++--------------
 2 files changed, 68 insertions(+), 42 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 4a656e38edf..f18a67fa565 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -45,6 +45,7 @@
     M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \
     M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \
     M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \
+    M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \
     M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \
     M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \
     M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \
diff --git a/src/IO/OpenedFileCache.h b/src/IO/OpenedFileCache.h
index 61e502a494b..2cecc675af7 100644
--- a/src/IO/OpenedFileCache.h
+++ b/src/IO/OpenedFileCache.h
@@ -4,14 +4,18 @@
 #include <mutex>
 
 #include <Core/Types.h>
-#include <Common/ProfileEvents.h>
 #include <IO/OpenedFile.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/ProfileEvents.h>
+
+#include <city.h>
 
 
 namespace ProfileEvents
 {
     extern const Event OpenedFileCacheHits;
     extern const Event OpenedFileCacheMisses;
+    extern const Event OpenedFileCacheMicroseconds;
 }
 
 namespace DB
@@ -26,57 +30,79 @@ namespace DB
   */
 class OpenedFileCache
 {
-private:
-    using Key = std::pair<std::string /* path */, int /* flags */>;
+    class OpenedFileMap
+    {
+        using Key = std::pair<std::string /* path */, int /* flags */>;
 
-    using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
-    using Files = std::map<Key, OpenedFileWeakPtr>;
+        using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
+        using Files = std::map<Key, OpenedFileWeakPtr>;
 
-    Files files;
-    std::mutex mutex;
+        Files files;
+        std::mutex mutex;
+
+    public:
+        using OpenedFilePtr = std::shared_ptr<OpenedFile>;
+
+        OpenedFilePtr get(const std::string & path, int flags)
+        {
+            Key key(path, flags);
+
+            std::lock_guard lock(mutex);
+
+            auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
+            if (!inserted)
+            {
+                if (auto res = it->second.lock())
+                {
+                    ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
+                    return res;
+                }
+            }
+            ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
+
+            OpenedFilePtr res
+            {
+                new OpenedFile(path, flags),
+                [key, this](auto ptr)
+                {
+                    {
+                        std::lock_guard another_lock(mutex);
+                        files.erase(key);
+                    }
+                    delete ptr;
+                }
+            };
+
+            it->second = res;
+            return res;
+        }
+
+        void remove(const std::string & path, int flags)
+        {
+            Key key(path, flags);
+            std::lock_guard lock(mutex);
+            files.erase(key);
+        }
+    };
+
+    static constexpr size_t buckets = 1024;
+    std::vector<OpenedFileMap> impls{buckets};
 
 public:
-    using OpenedFilePtr = std::shared_ptr<OpenedFile>;
+    using OpenedFilePtr = OpenedFileMap::OpenedFilePtr;
 
     OpenedFilePtr get(const std::string & path, int flags)
     {
-        Key key(path, flags);
-
-        std::lock_guard lock(mutex);
-
-        auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
-        if (!inserted)
-        {
-            if (auto res = it->second.lock())
-            {
-                ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
-                return res;
-            }
-        }
-        ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
-
-        OpenedFilePtr res
-        {
-            new OpenedFile(path, flags),
-            [key, this](auto ptr)
-            {
-                {
-                    std::lock_guard another_lock(mutex);
-                    files.erase(key);
-                }
-                delete ptr;
-            }
-        };
-
-        it->second = res;
-        return res;
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
+        const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
+        return impls[bucket].get(path, flags);
     }
 
     void remove(const std::string & path, int flags)
     {
-        Key key(path, flags);
-        std::lock_guard lock(mutex);
-        files.erase(key);
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
+        const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
+        impls[bucket].remove(path, flags);
     }
 
     static OpenedFileCache & instance()
@@ -87,5 +113,4 @@ public:
 };
 
 using OpenedFileCachePtr = std::shared_ptr<OpenedFileCache>;
-
 }

From f067f8c46d2aec217c3f835441ca1a2a281c72fd Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 24 Jul 2023 15:37:16 +0000
Subject: [PATCH 238/242] Make 01951_distributed_push_down_limit analyzer
 agnostic

---
 tests/analyzer_tech_debt.txt                  |  1 -
 ...1951_distributed_push_down_limit.reference | 32 +++++++++----------
 .../01951_distributed_push_down_limit.sql     |  4 +--
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index b746d1610a4..1d56b2c3a71 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -72,7 +72,6 @@
 01925_test_storage_merge_aliases
 01930_optimize_skip_unused_shards_rewrite_in
 01947_mv_subquery
-01951_distributed_push_down_limit
 01952_optimize_distributed_group_by_sharding_key
 02000_join_on_const
 02001_shard_num_shard_count
diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference
index b9a7d17e955..d175d31846b 100644
--- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference
+++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference
@@ -1,19 +1,19 @@
 -- { echo }
-explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0;
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Merge sorted streams after aggregation stage for ORDER BY)
+explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0;
+Expression
+  Limit
+    Sorting
       Union
-        Sorting (Sorting for ORDER BY)
-          Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))))
-            ReadFromStorage (SystemNumbers)
-        ReadFromRemote (Read from remote replica)
-explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1;
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Merge sorted streams after aggregation stage for ORDER BY)
+        Sorting
+          Expression
+            ReadFromStorage
+        ReadFromRemote
+explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1;
+Expression
+  Limit
+    Sorting
       Union
-        Sorting (Sorting for ORDER BY)
-          Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))))
-            ReadFromStorage (SystemNumbers)
-        ReadFromRemote (Read from remote replica)
+        Sorting
+          Expression
+            ReadFromStorage
+        ReadFromRemote
diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql
index 184e6321988..aee714a494e 100644
--- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql
+++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql
@@ -3,5 +3,5 @@
 set prefer_localhost_replica = 1;
 
 -- { echo }
-explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0;
-explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1;
+explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0;
+explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1;

From 22a2fa097f3795cb2a483e899482b97f80aa8189 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 19:40:02 +0200
Subject: [PATCH 239/242] Improve error messages

---
 src/Functions/GregorianDate.cpp | 2 +-
 src/Functions/parseDateTime.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
index aaaeeb7339d..f28194781c2 100644
--- a/src/Functions/GregorianDate.cpp
+++ b/src/Functions/GregorianDate.cpp
@@ -125,7 +125,7 @@ void GregorianDate::init(ReadBuffer & in)
     assertEOF(in);
 
     if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
-        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date");
+        throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {}).");
 }
 
 bool GregorianDate::tryInit(ReadBuffer & in)
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index c3fbc08c4a9..2381def9151 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -398,7 +398,7 @@ namespace
         static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_)
         {
             if (!isDayOfYearValid(year_, day_of_year_))
-                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_);
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_);
 
             Int32 res = daysSinceEpochFromDate(year_, 1, 1);
             res += day_of_year_ - 1;
@@ -408,7 +408,7 @@ namespace
         static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_)
         {
             if (!isDateValid(year_, month_, day_))
-                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_);
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_);
 
             Int32 res = cumulativeYearDays[year_ - 1970];
             res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1];

From 654af41431423907fdffed93287e9160f78698b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jul 2023 19:45:55 +0200
Subject: [PATCH 240/242] Fix race

---
 src/Functions/transform.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 79168d82c54..e03701327b1 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -658,13 +658,13 @@ namespace
             std::unique_ptr<StringToIdx> table_string_to_idx;
             std::unique_ptr<AnythingToIdx> table_anything_to_idx;
 
-            bool is_empty = false;
-
             ColumnPtr from_column;
             ColumnPtr to_column;
             ColumnPtr default_column;
 
-            std::atomic<bool> initialized{false};
+            bool is_empty = false;
+            bool initialized = false;
+
             std::mutex mutex;
         };
 
@@ -697,13 +697,12 @@ namespace
         /// Can be called from different threads. It works only on the first call.
         void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
         {
+            std::lock_guard lock(cache.mutex);
             if (cache.initialized)
                 return;
 
             const DataTypePtr & from_type = arguments[0].type;
 
-            std::lock_guard lock(cache.mutex);
-
             if (from_type->onlyNull())
             {
                 cache.is_empty = true;

From c35da36ff2b78dff5b964774673b8c713aa22e95 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 24 Jul 2023 19:50:53 +0200
Subject: [PATCH 241/242] Fix default value

---
 base/poco/Foundation/include/Poco/URI.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index f4505147ced..eba8109253d 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ public:
     URI();
     /// Creates an empty URI.
 
-    explicit URI(const std::string & uri, bool disable_url_encoding = true);
+    explicit URI(const std::string & uri, bool disable_url_encoding = false);
     /// Parses an URI from the given string. Throws a
     /// SyntaxException if the uri is not valid.
 
@@ -362,7 +362,7 @@ private:
     std::string _query;
     std::string _fragment;
 
-    bool _disable_url_encoding = true;
+    bool _disable_url_encoding = false;
 };
 
 
From 2f99363db0356f146db427934b63e9158b7b9858 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 24 Jul 2023 20:51:53 +0300
Subject: [PATCH 242/242] Update 02136_scalar_subquery_metrics.sql

---
 tests/queries/0_stateless/02136_scalar_subquery_metrics.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql
index 180610288aa..17ff367a58d 100644
--- a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql
+++ b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql
@@ -6,7 +6,7 @@ SELECT '#02136_scalar_subquery_4', (SELECT max(number) FROM numbers(1000)) as n
 SYSTEM FLUSH LOGS;
 SELECT read_rows, query FROM system.query_log
 WHERE
-      event_date > yesterday()
+      event_date >= yesterday()
   AND type = 'QueryFinish'
   AND current_database == currentDatabase()
   AND query LIKE 'SELECT ''#02136_scalar_subquery_%'