From 02f52bfbf2d80c1f6292bea7ad8ebad15412138f Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Thu, 3 Jun 2021 13:30:24 +0800
Subject: [PATCH 001/110] Add type check when create materialized view with to
 table

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 13 ++++++++++
 ...ialized_view_to_table_type_check.reference |  5 ++++
 ..._materialized_view_to_table_type_check.sql | 25 +++++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 tests/queries/0_stateless/01880_materialized_view_to_table_type_check.reference
 create mode 100644 tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 86b810d031e..a341248da3c 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -957,6 +957,19 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     if (!created)   /// Table already exists
         return {};
 
+    /// Check type compatible for materialized dest table and select columns
+    if (create.select && create.is_materialized_view && create.to_table_id)
+    {
+        StoragePtr table = DatabaseCatalog::instance().getTable({create.database, create.table, create.uuid}, getContext());
+        const auto & output_columns = table->getInMemoryMetadataPtr()->getSampleBlock();
+        Block input_columns=InterpreterSelectWithUnionQuery(
+            create.select->clone(), getContext(),SelectQueryOptions().analyze()).getSampleBlock();
+        auto actions_dag = ActionsDAG::makeConvertingActions(
+            input_columns.getColumnsWithTypeAndName(),
+            output_columns.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Position);
+    }
+
     return fillTableIfNeeded(create);
 }
 
diff --git a/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.reference b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.reference
new file mode 100644
index 00000000000..5498a268179
--- /dev/null
+++ b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.reference
@@ -0,0 +1,5 @@
+----------test--------:
+----------test--------:
+100	\0\0\0\0\0\0\0
+101	\0\0\0\0\0\0\0
+102	\0\0\0\0\0\0\0
diff --git a/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql
new file mode 100644
index 00000000000..342ef08bc89
--- /dev/null
+++ b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql
@@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS test_mv;
+DROP TABLE IF EXISTS test;
+DROP TABLE IF EXISTS test_input;
+
+CREATE TABLE test_input(id Int32) ENGINE=MergeTree() order by id; 
+
+CREATE TABLE test(`id` Int32, `pv` AggregateFunction(sum, Int32)) ENGINE = AggregatingMergeTree() ORDER BY id;
+
+CREATE MATERIALIZED VIEW test_mv to test(`id` Int32, `pv` AggregateFunction(sum, Int32)) as SELECT id, sumState(1) as pv from test_input group by id; -- { serverError 70 } 
+
+DROP VIEW test_mv;
+
+INSERT INTO test_input SELECT toInt32(number % 1000) AS id FROM numbers(10);
+select '----------test--------:';
+select * from test;
+
+create MATERIALIZED VIEW test_mv to test(`id` Int32, `pv` AggregateFunction(sum, Int32)) as SELECT id, sumState(toInt32(1)) as pv from test_input group by id; 
+INSERT INTO test_input SELECT toInt32(number % 1000) AS id FROM numbers(100,3);
+
+select '----------test--------:';
+select * from test;
+
+DROP TABLE test_mv;
+DROP TABLE test;
+DROP TABLE test_input;

From 4608d1ebde29797a2f34309a3f8c468bf9fdb494 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Tue, 8 Jun 2021 12:30:40 +0800
Subject: [PATCH 002/110] Fix error

---
 src/Interpreters/InterpreterCreateQuery.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index a341248da3c..28230e0cba4 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -962,12 +962,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     {
         StoragePtr table = DatabaseCatalog::instance().getTable({create.database, create.table, create.uuid}, getContext());
         const auto & output_columns = table->getInMemoryMetadataPtr()->getSampleBlock();
-        Block input_columns=InterpreterSelectWithUnionQuery(
-            create.select->clone(), getContext(),SelectQueryOptions().analyze()).getSampleBlock();
+        Block input_columns = InterpreterSelectWithUnionQuery(
+            create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
         auto actions_dag = ActionsDAG::makeConvertingActions(
             input_columns.getColumnsWithTypeAndName(),
             output_columns.getColumnsWithTypeAndName(),
-            ActionsDAG::MatchColumnsMode::Position);
+            ActionsDAG::MatchColumnsMode::Name);
     }
 
     return fillTableIfNeeded(create);

From 21028ea93667f5a81072a6173e452c48cf1c33bd Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Tue, 15 Jun 2021 16:40:34 +0800
Subject: [PATCH 003/110] Add more type checks

---
 src/Interpreters/InterpreterCreateQuery.cpp | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 28230e0cba4..99315295514 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -960,13 +960,23 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Check type compatible for materialized dest table and select columns
     if (create.select && create.is_materialized_view && create.to_table_id)
     {
-        StoragePtr table = DatabaseCatalog::instance().getTable({create.database, create.table, create.uuid}, getContext());
-        const auto & output_columns = table->getInMemoryMetadataPtr()->getSampleBlock();
+        StoragePtr to_table = DatabaseCatalog::instance().getTable({create.to_table_id.database_name,
+            create.to_table_id.table_name,
+            create.to_table_id.uuid},
+            getContext());
+        const auto & to_output_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
+        StoragePtr view_table = DatabaseCatalog::instance().getTable({create.database, create.table, create.uuid}, getContext());
+        const auto & view_output_columns = view_table->getInMemoryMetadataPtr()->getSampleBlock();
+
         Block input_columns = InterpreterSelectWithUnionQuery(
             create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
-        auto actions_dag = ActionsDAG::makeConvertingActions(
+        ActionsDAG::makeConvertingActions(
             input_columns.getColumnsWithTypeAndName(),
-            output_columns.getColumnsWithTypeAndName(),
+            to_output_columns.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Position);
+        ActionsDAG::makeConvertingActions(
+            input_columns.getColumnsWithTypeAndName(),
+            view_output_columns.getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Name);
     }
 

From c6f182163ac6db6dd2d122ed752e5412d708c2a0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 5 Apr 2022 10:57:39 +0300
Subject: [PATCH 004/110] Require mutations for per-table TTL only when it had
 been changed

Before this patch only per-column TTL did not requires mutation if it
had not been changed, after per-table TTL will also check whether it had
been changed or not.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/AlterCommands.cpp                | 13 ++++++++---
 ...per_table_ttl_mutation_on_change.reference | 22 +++++++++++++++++++
 ...02265_per_table_ttl_mutation_on_change.sql | 22 +++++++++++++++++++
 3 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.reference
 create mode 100644 tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.sql

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index edd9dad2c02..5132fc9244c 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -818,22 +818,29 @@ bool AlterCommand::isCommentAlter() const
 bool AlterCommand::isTTLAlter(const StorageInMemoryMetadata & metadata) const
 {
     if (type == MODIFY_TTL)
+    {
+        if (!metadata.table_ttl.definition_ast)
+            return true;
+        /// If TTL had not been changed, do not require mutations
+        if (queryToString(metadata.table_ttl.definition_ast) == queryToString(ttl))
+            return false;
         return true;
+    }
 
     if (!ttl || type != MODIFY_COLUMN)
         return false;
 
-    bool ttl_changed = true;
+    bool column_ttl_changed = true;
     for (const auto & [name, ttl_ast] : metadata.columns.getColumnTTLs())
     {
         if (name == column_name && queryToString(*ttl) == queryToString(*ttl_ast))
         {
-            ttl_changed = false;
+            column_ttl_changed = false;
             break;
         }
     }
 
-    return ttl_changed;
+    return column_ttl_changed;
 }
 
 bool AlterCommand::isRemovingProperty() const
diff --git a/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.reference b/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.reference
new file mode 100644
index 00000000000..740b4edf189
--- /dev/null
+++ b/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.reference
@@ -0,0 +1,22 @@
+-- { echoOn }
+alter table per_table_ttl_02265 modify TTL date + interval 1 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+1
+alter table per_table_ttl_02265 modify TTL date + interval 1 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+1
+alter table per_table_ttl_02265 modify TTL date + interval 2 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+2
+alter table per_table_ttl_02265 modify TTL date + interval 2 month group by key set value = argMax(value, date);
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+3
+alter table per_table_ttl_02265 modify TTL date + interval 2 month group by key set value = argMax(value, date);
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+3
+alter table per_table_ttl_02265 modify TTL date + interval 2 month recompress codec(ZSTD(17));
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+4
+alter table per_table_ttl_02265 modify TTL date + interval 2 month recompress codec(ZSTD(17));
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+4
diff --git a/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.sql b/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.sql
new file mode 100644
index 00000000000..53e2e72228a
--- /dev/null
+++ b/tests/queries/0_stateless/02265_per_table_ttl_mutation_on_change.sql
@@ -0,0 +1,22 @@
+drop table if exists per_table_ttl_02265;
+create table per_table_ttl_02265 (key Int, date Date, value String) engine=MergeTree() order by key;
+insert into per_table_ttl_02265 values (1, today(), '1');
+
+-- { echoOn }
+alter table per_table_ttl_02265 modify TTL date + interval 1 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 1 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 2 month;
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 2 month group by key set value = argMax(value, date);
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 2 month group by key set value = argMax(value, date);
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 2 month recompress codec(ZSTD(17));
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+alter table per_table_ttl_02265 modify TTL date + interval 2 month recompress codec(ZSTD(17));
+select count() from system.mutations where database = currentDatabase() and table = 'per_table_ttl_02265';
+
+-- { echoOff }
+drop table per_table_ttl_02265;

From 8ebaf8498951bb9e866aa4f3ee4764e8146c5013 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 5 Apr 2022 13:39:05 +0300
Subject: [PATCH 005/110] Fix clang-tidy readability-simplify-boolean-expr
 warning in AlterCommands.cpp

I want to make the code more readable before, but clang-tidy is too
smart...

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/AlterCommands.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 5132fc9244c..286f58739f0 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -822,9 +822,7 @@ bool AlterCommand::isTTLAlter(const StorageInMemoryMetadata & metadata) const
         if (!metadata.table_ttl.definition_ast)
             return true;
         /// If TTL had not been changed, do not require mutations
-        if (queryToString(metadata.table_ttl.definition_ast) == queryToString(ttl))
-            return false;
-        return true;
+        return queryToString(metadata.table_ttl.definition_ast) != queryToString(ttl);
     }
 
     if (!ttl || type != MODIFY_COLUMN)

From 44bf8ba5681d237daf7721d648954581f2260c33 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 8 Apr 2022 08:05:04 +0300
Subject: [PATCH 006/110] Remove trailing whitespace in default client_name

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/ClientInfo.cpp                       | 5 ++++-
 tests/queries/0_stateless/02270_client_name.reference | 1 +
 tests/queries/0_stateless/02270_client_name.sql       | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02270_client_name.reference
 create mode 100644 tests/queries/0_stateless/02270_client_name.sql

diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp
index 75af25e842e..614a966593f 100644
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@@ -193,7 +193,10 @@ void ClientInfo::setInitialQuery()
 {
     query_kind = QueryKind::INITIAL_QUERY;
     fillOSUserHostNameAndVersionInfo();
-    client_name = (DBMS_NAME " ") + client_name;
+    if (client_name.empty())
+        client_name = DBMS_NAME;
+    else
+        client_name = (DBMS_NAME " ") + client_name;
 }
 
 
diff --git a/tests/queries/0_stateless/02270_client_name.reference b/tests/queries/0_stateless/02270_client_name.reference
new file mode 100644
index 00000000000..fbb2921010e
--- /dev/null
+++ b/tests/queries/0_stateless/02270_client_name.reference
@@ -0,0 +1 @@
+"ClickHouse"
diff --git a/tests/queries/0_stateless/02270_client_name.sql b/tests/queries/0_stateless/02270_client_name.sql
new file mode 100644
index 00000000000..8ec01090246
--- /dev/null
+++ b/tests/queries/0_stateless/02270_client_name.sql
@@ -0,0 +1,3 @@
+select 1 settings log_queries=1, log_queries_min_type='QUERY_FINISH' format Null;
+system flush logs;
+select client_name from system.query_log where current_database = currentDatabase() and query like 'select 1%' format CSV;

From 50a32a74b70fbf7460bef948b14e742513b83020 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 8 Apr 2022 15:38:35 +0200
Subject: [PATCH 007/110] add thread_id and query_id to zookeeper_log

---
 src/Common/ZooKeeper/ZooKeeperCommon.h |  2 ++
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 11 +++++++++++
 src/Common/ZooKeeper/ZooKeeperImpl.h   |  1 +
 src/Interpreters/ZooKeeperLog.cpp      |  4 ++++
 src/Interpreters/ZooKeeperLog.h        |  2 ++
 5 files changed, 20 insertions(+)

diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 80e38dd74d5..e51bea3f7f8 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -57,6 +57,8 @@ struct ZooKeeperRequest : virtual Request
     bool restored_from_zookeeper_log = false;
 
     UInt64 request_created_time_ns = 0;
+    UInt64 thread_id = 0;
+    String query_id;
 
     ZooKeeperRequest() = default;
     ZooKeeperRequest(const ZooKeeperRequest &) = default;
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 935df255843..b8a6e040bc8 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -8,6 +8,7 @@
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 #include <base/logger_useful.h>
+#include <base/getThreadId.h>
 
 #include <Common/config.h>
 
@@ -1016,6 +1017,11 @@ void ZooKeeper::pushRequest(RequestInfo && info)
     try
     {
         info.time = clock::now();
+        if (zk_log)
+        {
+            info.request->thread_id = getThreadId();
+            info.request->query_id = String(CurrentThread::getQueryId());
+        }
 
         if (!info.request->xid)
         {
@@ -1269,6 +1275,11 @@ void ZooKeeper::logOperationIfNeeded(const ZooKeeperRequestPtr & request, const
         elem.event_time = event_time;
         elem.address = socket_address;
         elem.session_id = session_id;
+        if (request)
+        {
+            elem.thread_id = request->thread_id;
+            elem.query_id = request->query_id;
+        }
         maybe_zk_log->add(elem);
     }
 }
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index 58c5947e8ea..faa777c33aa 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -219,6 +219,7 @@ private:
         ResponseCallback callback;
         WatchCallback watch;
         clock::time_point time;
+        UInt64 thread_id = 0;
     };
 
     using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp
index fdcbe430834..2828c3e2fa8 100644
--- a/src/Interpreters/ZooKeeperLog.cpp
+++ b/src/Interpreters/ZooKeeperLog.cpp
@@ -116,6 +116,8 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes()
         {"type", std::move(type_enum)},
         {"event_date", std::make_shared<DataTypeDate>()},
         {"event_time", std::make_shared<DataTypeDateTime64>(6)},
+        {"thread_id", std::make_shared<DataTypeUInt64>()},
+        {"query_id", std::make_shared<DataTypeString>()},
         {"address", DataTypeFactory::instance().get("IPv6")},
         {"port", std::make_shared<DataTypeUInt16>()},
         {"session_id", std::make_shared<DataTypeInt64>()},
@@ -164,6 +166,8 @@ void ZooKeeperLogElement::appendToBlock(MutableColumns & columns) const
     auto event_time_seconds = event_time / 1000000;
     columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType());
     columns[i++]->insert(event_time);
+    columns[i++]->insert(thread_id);
+    columns[i++]->insert(query_id);
     columns[i++]->insertData(IPv6ToBinary(address.host()).data(), 16);
     columns[i++]->insert(address.port());
     columns[i++]->insert(session_id);
diff --git a/src/Interpreters/ZooKeeperLog.h b/src/Interpreters/ZooKeeperLog.h
index 284675a7ff5..733c61ba8fb 100644
--- a/src/Interpreters/ZooKeeperLog.h
+++ b/src/Interpreters/ZooKeeperLog.h
@@ -22,6 +22,8 @@ struct ZooKeeperLogElement
 
     Type type = UNKNOWN;
     Decimal64 event_time = 0;
+    UInt64 thread_id = 0;
+    String query_id;
     Poco::Net::SocketAddress address;
     Int64 session_id = 0;
 

From 44b221fd22f9d8c0fa6f563d80e1dfafbf6726a8 Mon Sep 17 00:00:00 2001
From: Thom O'Connor <thom.oconnor@yahoo.com>
Date: Fri, 8 Apr 2022 10:53:08 -0600
Subject: [PATCH 008/110] Updated adopters: added 4 new adopters

---
 docs/en/introduction/adopters.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 98eea85bbfa..e4d60a4fc9e 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -28,6 +28,7 @@ toc_title: Adopters
 | <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
 | <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
 | <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
+| <a href="https://betterstack.com/" class="favicon">Better Stack</a> | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) |
 | <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
 | <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
 | <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
@@ -112,7 +113,7 @@ toc_title: Adopters
 | <a href="https://nlmk.com/en/" class="favicon">NLMK</a> | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) |
 | <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
 | <a href="https://www.noction.com" class="favicon">Noction</a> | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability)
-| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
+| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
 | <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
 | <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
 | <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
@@ -123,6 +124,7 @@ toc_title: Adopters
 | <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
 | <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
 | <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
+| <a href="https://pingcap.com/" class="favicon">PingCAP</a> | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) |
 | <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
 | <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) |
 | <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
@@ -159,6 +161,7 @@ toc_title: Adopters
 | <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
 | <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
 | <a href="https://swetrix.com" class="favicon">Swetrix</a> | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) |
+| <a href="https://synpse.net/" class="favicon">Synpse</a> | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) |
 | <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
 | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
 | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
@@ -172,6 +175,7 @@ toc_title: Adopters
 | <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
 | <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
 | <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
+| <a href="https://vkontech.com/" class="favicon">VKontech</a> | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) |
 | <a href="https://www.vmware.com/" class="favicon">VMware</a> | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) |
 | <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
 | <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |

From 08dfce9a77ab6a025bd44e7a96492656da0553ec Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Fri, 8 Apr 2022 18:45:18 -0400
Subject: [PATCH 009/110] Prepared partitions differ from raw data

In both the system exposed at `gh-api.clickhouse.com/play` and in the prepared partitions the field `IATA_CODE_Reporting_Airline` has been renamed as `Carrier`, which breaks many of the queries.

Note: I assume that the raw data uses `IATA_CODE_Reporting_Airline`, but I do not have disk space to test.
---
 docs/en/getting-started/example-datasets/ontime.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md
index efc807b75fa..e4ae7237d57 100644
--- a/docs/en/getting-started/example-datasets/ontime.md
+++ b/docs/en/getting-started/example-datasets/ontime.md
@@ -159,6 +159,10 @@ $ clickhouse-client --query "select count(*) from datasets.ontime"
 !!! info "Info"
     If you will run the queries described below, you have to use the full table name, `datasets.ontime`.
 
+
+!!! info "Info"
+    If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` with Carrier (see `describe ontime`).
+
 ## Queries {#queries}
 
 Q0.

From 51dc21a55cd2ce1901b2e2fd191f78057147f611 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Fri, 8 Apr 2022 18:49:47 -0400
Subject: [PATCH 010/110] add '

---
 docs/en/getting-started/example-datasets/ontime.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md
index e4ae7237d57..87096354f1f 100644
--- a/docs/en/getting-started/example-datasets/ontime.md
+++ b/docs/en/getting-started/example-datasets/ontime.md
@@ -161,7 +161,7 @@ $ clickhouse-client --query "select count(*) from datasets.ontime"
 
 
 !!! info "Info"
-    If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` with Carrier (see `describe ontime`).
+    If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` in the following queries with `Carrier` (see `describe ontime`).
 
 ## Queries {#queries}
 

From 3521292fad5ac69c05704c8c43520ac0c623966e Mon Sep 17 00:00:00 2001
From: jewisliu <jewisliu@tencent.com>
Date: Sun, 10 Apr 2022 11:54:15 +0800
Subject: [PATCH 011/110] Auto assign numbers for Enum elements

---
 src/DataTypes/DataTypeEnum.cpp                | 20 +++++++++++++++++++
 .../0_stateless/00757_enum_defaults.reference |  2 ++
 .../0_stateless/00757_enum_defaults.sql       |  6 ++++++
 3 files changed, 28 insertions(+)

diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp
index b659d92e3f4..e292897e451 100644
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@@ -191,6 +191,24 @@ static void checkASTStructure(const ASTPtr & child)
                         ErrorCodes::UNEXPECTED_AST_STRUCTURE);
 }
 
+static void autoAssignNumberForEnum(const ASTPtr & arguments)
+{
+    Int16 child_count = 1;
+    ASTs assign_number_child;
+    assign_number_child.reserve(arguments->children.size());
+    for (const ASTPtr & child : arguments->children)
+    {
+        if (child->as<ASTLiteral>())
+        {
+            ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(child_count++));
+            assign_number_child.emplace_back(func);
+        }
+        else
+            assign_number_child.emplace_back(child);
+    }
+    arguments->children = assign_number_child;
+}
+
 template <typename DataTypeEnum>
 static DataTypePtr createExact(const ASTPtr & arguments)
 {
@@ -202,6 +220,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
 
     using FieldType = typename DataTypeEnum::FieldType;
 
+    autoAssignNumberForEnum(arguments);
     /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
     for (const ASTPtr & child : arguments->children)
     {
@@ -236,6 +255,7 @@ static DataTypePtr create(const ASTPtr & arguments)
     if (!arguments || arguments->children.empty())
         throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
 
+    autoAssignNumberForEnum(arguments);
     /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
     for (const ASTPtr & child : arguments->children)
     {
diff --git a/tests/queries/0_stateless/00757_enum_defaults.reference b/tests/queries/0_stateless/00757_enum_defaults.reference
index 56ead34ad3b..35ef3c72406 100644
--- a/tests/queries/0_stateless/00757_enum_defaults.reference
+++ b/tests/queries/0_stateless/00757_enum_defaults.reference
@@ -4,3 +4,5 @@ iphone	1
 iphone	1
 
 \N	1
+a
+b
diff --git a/tests/queries/0_stateless/00757_enum_defaults.sql b/tests/queries/0_stateless/00757_enum_defaults.sql
index 58f54a98b70..01f795b31f9 100644
--- a/tests/queries/0_stateless/00757_enum_defaults.sql
+++ b/tests/queries/0_stateless/00757_enum_defaults.sql
@@ -1,2 +1,8 @@
 select os_name, count() from (SELECT CAST('iphone' AS Enum8('iphone' = 1, 'android' = 2)) AS os_name) group by os_name WITH TOTALS;
 select toNullable(os_name) AS os_name, count() from (SELECT CAST('iphone' AS Enum8('iphone' = 1, 'android' = 2)) AS os_name) group by os_name WITH TOTALS;
+
+DROP TABLE IF EXISTS auto_assgin_enum;
+CREATE TABLE auto_assgin_enum (x enum('a', 'b')) ENGINE=MergeTree() order by x;
+INSERT INTO auto_assgin_enum VALUES('a'), ('b');
+select * from auto_assgin_enum;
+DROP TABLE auto_assgin_enum;

From f0fad98a73b88ffb54e8f93228a2855bed15785e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 11 Apr 2022 01:03:24 +0200
Subject: [PATCH 012/110] Reset thread name in thread pool

---
 src/Common/ThreadPool.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 8bfb93c9e94..15ae64f17fb 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -1,4 +1,5 @@
 #include <Common/ThreadPool.h>
+#include <Common/setThreadName.h>
 #include <Common/Exception.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 
@@ -243,6 +244,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
 
     while (true)
     {
+        /// This is inside the loop to also reset previous thread names set inside the jobs.
+        setThreadName("ThreadPoolIdle");
+
         Job job;
         bool need_shutdown = false;
 

From 802f62a80e64bd6ba9ab2cbabf9d4fbea508443d Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 11 Apr 2022 09:42:00 +0200
Subject: [PATCH 013/110] fix

---
 src/Common/ZooKeeper/ZooKeeperImpl.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index faa777c33aa..58c5947e8ea 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -219,7 +219,6 @@ private:
         ResponseCallback callback;
         WatchCallback watch;
         clock::time_point time;
-        UInt64 thread_id = 0;
     };
 
     using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;

From d2ccf876c6e68b78838e2c92078d4727f9e78c9b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Apr 2022 17:51:49 +0200
Subject: [PATCH 014/110] Fix

---
 src/Common/FileCache.cpp                      | 169 ++++++++++++------
 src/Common/FileCache.h                        |   7 +
 src/Common/FileSegment.cpp                    |  42 ++++-
 src/Common/FileSegment.h                      |   3 +
 src/Disks/IO/CachedReadBufferFromRemoteFS.cpp |  26 ++-
 src/Disks/IO/CachedReadBufferFromRemoteFS.h   |   2 +
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp |   4 +-
 src/IO/ReadBufferFromFileDescriptor.cpp       |   2 +-
 8 files changed, 191 insertions(+), 64 deletions(-)

diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp
index 05d32f5ffe4..2f9eec33022 100644
--- a/src/Common/FileCache.cpp
+++ b/src/Common/FileCache.cpp
@@ -233,6 +233,88 @@ FileSegments LRUFileCache::splitRangeIntoCells(
     return file_segments;
 }
 
+void LRUFileCache::fillHolesWithEmptyFileSegments(
+    FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
+{
+    /// There are segments [segment1, ..., segmentN]
+    /// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
+    /// intersect with given range.
+
+    /// It can have holes:
+    /// [____________________]         -- requested range
+    ///     [____]  [_]   [_________]  -- intersecting cache [segment1, ..., segmentN]
+    ///
+    /// For each such hole create a cell with file segment state EMPTY.
+
+    auto it = file_segments.begin();
+    auto segment_range = (*it)->range();
+
+    size_t current_pos;
+    if (segment_range.left < range.left)
+    {
+        ///    [_______     -- requested range
+        /// [_______
+        /// ^
+        /// segment1
+
+        current_pos = segment_range.right + 1;
+        ++it;
+    }
+    else
+        current_pos = range.left;
+
+    while (current_pos <= range.right && it != file_segments.end())
+    {
+        segment_range = (*it)->range();
+
+        if (current_pos == segment_range.left)
+        {
+            current_pos = segment_range.right + 1;
+            ++it;
+            continue;
+        }
+
+        assert(current_pos < segment_range.left);
+
+        auto hole_size = segment_range.left - current_pos;
+
+        if (fill_with_detached_file_segments)
+        {
+            auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY);
+            file_segment->detached = true;
+            file_segments.insert(it, file_segment);
+        }
+        else
+        {
+            file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
+        }
+
+        current_pos = segment_range.right + 1;
+        ++it;
+    }
+
+    if (current_pos <= range.right)
+    {
+        ///   ________]     -- requested range
+        ///   _____]
+        ///        ^
+        /// segmentN
+
+        auto hole_size = range.right - current_pos + 1;
+
+        if (fill_with_detached_file_segments)
+        {
+            auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY);
+            file_segment->detached = true;
+            file_segments.insert(file_segments.end(), file_segment);
+        }
+        else
+        {
+            file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
+        }
+    }
+}
+
 FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t size)
 {
     assertInitialized();
@@ -254,69 +336,42 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
     }
     else
     {
-        /// There are segments [segment1, ..., segmentN]
-        /// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
-        /// intersect with given range.
-
-        /// It can have holes:
-        /// [____________________]         -- requested range
-        ///     [____]  [_]   [_________]  -- intersecting cache [segment1, ..., segmentN]
-        ///
-        /// For each such hole create a cell with file segment state EMPTY.
-
-        auto it = file_segments.begin();
-        auto segment_range = (*it)->range();
-
-        size_t current_pos;
-        if (segment_range.left < range.left)
-        {
-            ///    [_______     -- requested range
-            /// [_______
-            /// ^
-            /// segment1
-
-            current_pos = segment_range.right + 1;
-            ++it;
-        }
-        else
-            current_pos = range.left;
-
-        while (current_pos <= range.right && it != file_segments.end())
-        {
-            segment_range = (*it)->range();
-
-            if (current_pos == segment_range.left)
-            {
-                current_pos = segment_range.right + 1;
-                ++it;
-                continue;
-            }
-
-            assert(current_pos < segment_range.left);
-
-            auto hole_size = segment_range.left - current_pos;
-            file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
-
-            current_pos = segment_range.right + 1;
-            ++it;
-        }
-
-        if (current_pos <= range.right)
-        {
-            ///   ________]     -- requested range
-            ///   _____]
-            ///        ^
-            /// segmentN
-
-            auto hole_size = range.right - current_pos + 1;
-            file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
-        }
+        fillHolesWithEmptyFileSegments(file_segments, key, range, false, cache_lock);
     }
 
     assert(!file_segments.empty());
     return FileSegmentsHolder(std::move(file_segments));
 }
 
+FileSegmentsHolder LRUFileCache::get(const Key & key, size_t offset, size_t size)
+{
+    assertInitialized();
+
+    FileSegment::Range range(offset, offset + size - 1);
+
+    std::lock_guard cache_lock(mutex);
+
+#ifndef NDEBUG
+    assertCacheCorrectness(key, cache_lock);
+#endif
+
+    /// Get all segments which intersect with the given range.
+    auto file_segments = getImpl(key, range, cache_lock);
+
+    if (file_segments.empty())
+    {
+        auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, FileSegment::State::EMPTY);
+        file_segment->detached = true;
+        file_segments = { file_segment };
+    }
+    else
+    {
+        fillHolesWithEmptyFileSegments(file_segments, key, range, true, cache_lock);
+    }
+
+    return FileSegmentsHolder(std::move(file_segments));
+}
+
 LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
     const Key & key, size_t offset, size_t size, FileSegment::State state,
     std::lock_guard<std::mutex> & cache_lock)
diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h
index e706376bc89..b1d46569949 100644
--- a/src/Common/FileCache.h
+++ b/src/Common/FileCache.h
@@ -72,6 +72,8 @@ public:
      */
     virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
 
+    virtual FileSegmentsHolder get(const Key & key, size_t offset, size_t size) = 0;
+
     virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;
 
     virtual FileSegments getSnapshot() const = 0;
@@ -124,6 +126,8 @@ public:
 
     FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
 
+    FileSegmentsHolder get(const Key & key, size_t offset, size_t size) override;
+
     FileSegments getSnapshot() const override;
 
     FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override;
@@ -213,6 +217,9 @@ private:
 
     String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
 
+    void fillHolesWithEmptyFileSegments(
+        FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
+
 public:
     struct Stat
     {
diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 5a13ea7d207..42490146b37 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -107,6 +107,9 @@ String FileSegment::getOrSetDownloader()
 {
     std::lock_guard segment_lock(mutex);
 
+    if (detached)
+        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot set downloader for a detached file segment");
+
     if (downloader_id.empty())
     {
         assert(download_state != State::DOWNLOADING);
@@ -216,6 +219,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
                         "Attempt to write {} bytes to offset: {}, but current download offset is {}",
                         size, offset_, download_offset);
 
+    assertNotDetached();
+
     if (!cache_writer)
     {
         if (downloaded_size > 0)
@@ -263,6 +268,8 @@ void FileSegment::writeInMemory(const char * from, size_t size)
             ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
             "Not enough space is reserved. Available: {}, expected: {}", availableSize(), size);
 
+    assertNotDetached();
+
     std::lock_guard segment_lock(mutex);
 
     if (cache_writer)
@@ -297,7 +304,9 @@ size_t FileSegment::finalizeWrite()
     size_t size = cache_writer->offset();
 
     if (size == 0)
-        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed");
+        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed");
+
+    assertNotDetached();
 
     try
     {
@@ -352,6 +361,8 @@ bool FileSegment::reserve(size_t size)
     if (!size)
         throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Zero space reservation is not allowed");
 
+    assertNotDetached();
+
     {
         std::lock_guard segment_lock(mutex);
 
@@ -419,7 +430,10 @@ void FileSegment::completeBatchAndResetDownloader()
     if (!is_downloader)
     {
         cv.notify_all();
-        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "File segment can be completed only by downloader");
+        throw Exception(
+            ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
+            "File segment can be completed only by downloader ({} != {})",
+            downloader_id, getCallerId());
     }
 
     resetDownloaderImpl(segment_lock);
@@ -453,6 +467,8 @@ void FileSegment::complete(State state)
 
     download_state = state;
 
+    assertNotDetached();
+
     try
     {
         completeImpl(cache_lock, segment_lock);
@@ -479,6 +495,8 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
     if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
         setDownloaded(segment_lock);
 
+    assertNotDetached();
+
     if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
     {
         /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
@@ -608,6 +626,12 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment
     assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
 }
 
+void FileSegment::assertNotDetached() const
+{
+    if (detached)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Operation not allowed, file segment is detached");
+}
+
 FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & /* cache_lock */)
 {
     auto snapshot = std::make_shared<FileSegment>(
@@ -638,6 +662,20 @@ FileSegmentsHolder::~FileSegmentsHolder()
         auto current_file_segment_it = file_segment_it;
         auto & file_segment = *current_file_segment_it;
 
+        if (file_segment->detached)
+        {
+#ifndef NDEBUG
+            {
+                std::lock_guard cache_lock(file_segment->cache->mutex);
+                std::lock_guard segment_lock(file_segment->mutex);
+                bool is_last_holder = cache->isLastFileSegmentHolder(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
+                assert(is_last_holder);
+            }
+#endif
+            file_segment_it = file_segments.erase(current_file_segment_it);
+            continue;
+        }
+
         if (!cache)
             cache = file_segment->cache;
 
diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h
index 615fd9a56de..8439389fdeb 100644
--- a/src/Common/FileSegment.h
+++ b/src/Common/FileSegment.h
@@ -150,6 +150,7 @@ private:
     size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
     String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
     void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
+    void assertNotDetached() const;
 
     void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
     void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
@@ -199,6 +200,8 @@ private:
 
     Poco::Logger * log;
 
+    /// "detached" file segment means that it is not owned by cache ("detached" from cache).
+    /// In general case, all file segments are owned by cache.
     bool detached = false;
 
     std::atomic<bool> is_downloaded{false};
diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
index 16c1dd54f9f..d39d54970dc 100644
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
@@ -46,7 +46,15 @@ CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS(
 
 void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size)
 {
-    file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size));
+
+    if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
+    {
+        file_segments_holder.emplace(cache->get(cache_key, offset, size));
+    }
+    else
+    {
+        file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size));
+    }
 
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous
@@ -326,6 +334,10 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
 #endif
 
             size_t seek_offset = file_offset_of_buffer_end - range.left;
+
+            if (file_offset_of_buffer_end < range.left)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} > {}", file_offset_of_buffer_end, range.left);
+
             read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
 
             break;
@@ -577,6 +589,8 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
 {
     last_caller_id = FileSegment::getCallerId();
 
+    assertCorrectness();
+
     if (!initialized)
         initialize(file_offset_of_buffer_end, getTotalSizeToRead());
 
@@ -597,8 +611,8 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
         {
             try
             {
-                bool file_segment_already_completed = !file_segment->isDownloader();
-                if (!file_segment_already_completed)
+                bool need_complete_file_segment = file_segment->isDownloader();
+                if (need_complete_file_segment)
                     file_segment->completeBatchAndResetDownloader();
             }
             catch (...)
@@ -820,6 +834,12 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
     return std::nullopt;
 }
 
+void CachedReadBufferFromRemoteFS::assertCorrectness() const
+{
+    if (IFileCache::isReadOnly() && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed");
+}
+
 String CachedReadBufferFromRemoteFS::getInfoForLog()
 {
     String implementation_buffer_read_range_str;
diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h
index 5fc9ec39246..1e5b0a4a2d4 100644
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h
@@ -50,6 +50,8 @@ private:
 
     bool nextImplStep();
 
+    void assertCorrectness() const;
+
     enum class ReadType
     {
         CACHED,
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 16a57b83771..e4690558b9c 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -38,7 +38,9 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
     current_path = path;
 
     auto cache = settings.remote_fs_cache;
-    bool with_cache = cache && settings.enable_filesystem_cache;
+    bool with_cache = cache
+        && settings.enable_filesystem_cache
+        && (!IFileCache::isReadOnly() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache);
 
     auto remote_file_reader_creator = [=, this]()
     {
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index d266fb86e0f..f7006666b13 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -191,7 +191,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
 
             off_t res = ::lseek(fd, seek_pos, SEEK_SET);
             if (-1 == res)
-                throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
+                throwFromErrnoWithPath(fmt::format("Cannot seek through file {} at offset {}", getFileName(), seek_pos), getFileName(),
                     ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
 
             /// Also note that seeking past the file size is not allowed.

From dc314d53b3b56a0bd6a875e7b0b7ae1cd02c2bdc Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 11 Apr 2022 20:28:11 -0400
Subject: [PATCH 015/110] do not create view before check, use
 MatchColumnsMode::Name mode

---
 src/Interpreters/InterpreterCreateQuery.cpp | 50 +++++++++++----------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 02d4dfd5ea9..d8bbf5fcca6 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1071,6 +1071,33 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create);
 
+    /// Check type compatible for materialized dest table and select columns
+    if (create.select && create.is_materialized_view && create.to_table_id)
+    {
+        StoragePtr to_table = DatabaseCatalog::instance().getTable({create.to_table_id.database_name,
+            create.to_table_id.table_name,
+            create.to_table_id.uuid},
+            getContext());
+        const auto & to_output_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
+
+        ColumnsWithTypeAndName view_output_columns;
+        for (const auto & [name, type] : properties.columns.getAllPhysical())
+            view_output_columns.emplace_back(type, name);
+
+        Block input_columns = InterpreterSelectWithUnionQuery(
+            create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
+
+        ActionsDAG::makeConvertingActions(
+            input_columns.getColumnsWithTypeAndName(),
+            view_output_columns,
+            ActionsDAG::MatchColumnsMode::Name);
+
+        ActionsDAG::makeConvertingActions(
+            view_output_columns,
+            to_output_columns.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Name);
+    }
+
     DatabasePtr database;
     bool need_add_to_database = !create.temporary;
     if (need_add_to_database)
@@ -1098,29 +1125,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     if (!created)   /// Table already exists
         return {};
 
-    /// Check type compatible for materialized dest table and select columns
-    if (create.select && create.is_materialized_view && create.to_table_id)
-    {
-        StoragePtr to_table = DatabaseCatalog::instance().getTable({create.to_table_id.database_name,
-            create.to_table_id.table_name,
-            create.to_table_id.uuid},
-            getContext());
-        const auto & to_output_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
-        StoragePtr view_table = DatabaseCatalog::instance().getTable({create.database, create.table, create.uuid}, getContext());
-        const auto & view_output_columns = view_table->getInMemoryMetadataPtr()->getSampleBlock();
-
-        Block input_columns = InterpreterSelectWithUnionQuery(
-            create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
-        ActionsDAG::makeConvertingActions(
-            input_columns.getColumnsWithTypeAndName(),
-            to_output_columns.getColumnsWithTypeAndName(),
-            ActionsDAG::MatchColumnsMode::Position);
-        ActionsDAG::makeConvertingActions(
-            input_columns.getColumnsWithTypeAndName(),
-            view_output_columns.getColumnsWithTypeAndName(),
-            ActionsDAG::MatchColumnsMode::Name);
-    }
-
     /// If table has dependencies - add them to the graph
     QualifiedTableName qualified_name{database_name, create.getTable()};
     TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr);

From e5494de63cdfd975a08f00469940193daf676f68 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 11 Apr 2022 21:07:45 -0700
Subject: [PATCH 016/110] h3Res0Indexes - remove unused array

---
 src/Functions/h3GetRes0Indexes.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Functions/h3GetRes0Indexes.cpp b/src/Functions/h3GetRes0Indexes.cpp
index 7347b0fcf7f..9be55c0bb62 100644
--- a/src/Functions/h3GetRes0Indexes.cpp
+++ b/src/Functions/h3GetRes0Indexes.cpp
@@ -50,11 +50,8 @@ public:
         res0_indexes.resize(cell_count);
         getRes0Cells(res0_indexes.data());
 
-        auto res = ColumnArray::create(ColumnUInt64::create());
-
         Array res_indexes;
         res_indexes.insert(res_indexes.end(), res0_indexes.begin(), res0_indexes.end());
-        res->insert(res_indexes);
 
         return result_type->createColumnConst(input_rows_count, res_indexes);
     }

From a77a228ab824e0c369aea03e999354169c91d241 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 12 Apr 2022 09:37:03 +0200
Subject: [PATCH 017/110] Update FileSegment.cpp

---
 src/Common/FileSegment.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 42490146b37..2d0e55de518 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -662,11 +662,14 @@ FileSegmentsHolder::~FileSegmentsHolder()
         auto current_file_segment_it = file_segment_it;
         auto & file_segment = *current_file_segment_it;
 
+        if (!cache)
+            cache = file_segment->cache;
+
         if (file_segment->detached)
         {
 #ifndef NDEBUG
             {
-                std::lock_guard cache_lock(file_segment->cache->mutex);
+                std::lock_guard cache_lock(cache->mutex);
                 std::lock_guard segment_lock(file_segment->mutex);
                 bool is_last_holder = cache->isLastFileSegmentHolder(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
                 assert(is_last_holder);
@@ -676,9 +679,6 @@ FileSegmentsHolder::~FileSegmentsHolder()
             continue;
         }
 
-        if (!cache)
-            cache = file_segment->cache;
-
         try
         {
             /// File segment pointer must be reset right after calling complete() and

From 50b5684d004f311ad8f2917b1467da644cc600e0 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 12 Apr 2022 11:59:58 +0200
Subject: [PATCH 018/110] Update FileSegment.cpp

---
 src/Common/FileSegment.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 2d0e55de518..92a478f3476 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -667,14 +667,6 @@ FileSegmentsHolder::~FileSegmentsHolder()
 
         if (file_segment->detached)
         {
-#ifndef NDEBUG
-            {
-                std::lock_guard cache_lock(cache->mutex);
-                std::lock_guard segment_lock(file_segment->mutex);
-                bool is_last_holder = cache->isLastFileSegmentHolder(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
-                assert(is_last_holder);
-            }
-#endif
             file_segment_it = file_segments.erase(current_file_segment_it);
             continue;
         }

From b2c10611d15293e020526af7bd9c637686c7982c Mon Sep 17 00:00:00 2001
From: jewisliu <jewisliu@tencent.com>
Date: Mon, 11 Apr 2022 18:18:49 +0800
Subject: [PATCH 019/110] support UNSIGNED modifier with unused parameters of
 INT

---
 src/Parsers/ParserDataType.cpp                    | 15 +++++++++++++++
 .../00841_temporary_table_database.reference      |  4 ++++
 .../00841_temporary_table_database.sql            | 10 ++++++++++
 3 files changed, 29 insertions(+)

diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 63a73ff4d1c..b0db5430726 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -97,6 +97,21 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             type_name_suffix = "SIGNED";
         else if (ParserKeyword("UNSIGNED").ignore(pos))
             type_name_suffix = "UNSIGNED";
+        else if (pos->type == TokenType::OpeningRoundBracket)
+        {
+            ++pos;
+            if (pos->type != TokenType::Number)
+                return false;
+            ++pos;
+            if (pos->type != TokenType::ClosingRoundBracket)
+               return false;
+            ++pos;
+            if (ParserKeyword("SIGNED").ignore(pos))
+                type_name_suffix = "SIGNED";
+            else if (ParserKeyword("UNSIGNED").ignore(pos))
+                type_name_suffix = "UNSIGNED";
+        }
+
     }
 
     if (!type_name_suffix.empty())
diff --git a/tests/queries/0_stateless/00841_temporary_table_database.reference b/tests/queries/0_stateless/00841_temporary_table_database.reference
index d00491fd7e5..f22f30a4b2a 100644
--- a/tests/queries/0_stateless/00841_temporary_table_database.reference
+++ b/tests/queries/0_stateless/00841_temporary_table_database.reference
@@ -1 +1,5 @@
 1
+CREATE TEMPORARY TABLE t3_00841\n(\n    `x` UInt32\n)\nENGINE = Memory
+1
+CREATE TEMPORARY TABLE t4_00841\n(\n    `x` Int32\n)\nENGINE = Memory
+1
diff --git a/tests/queries/0_stateless/00841_temporary_table_database.sql b/tests/queries/0_stateless/00841_temporary_table_database.sql
index a5927a4cd33..96faa0e779b 100644
--- a/tests/queries/0_stateless/00841_temporary_table_database.sql
+++ b/tests/queries/0_stateless/00841_temporary_table_database.sql
@@ -3,3 +3,13 @@ INSERT INTO t1_00841 VALUES (1);
 SELECT * FROM t1_00841;
 
 CREATE TEMPORARY TABLE test.t2_00841 (x UInt8); -- { serverError 442 }
+
+CREATE TEMPORARY TABLE t3_00841 (x INT(11) UNSIGNED);
+SHOW CREATE TEMPORARY TABLE t3_00841;
+INSERT INTO t3_00841 VALUES (1);
+SELECT * FROM t3_00841;
+
+CREATE TEMPORARY TABLE t4_00841 (x INT(11) SIGNED);
+SHOW CREATE TEMPORARY TABLE t4_00841;
+INSERT INTO t4_00841 VALUES (1);
+SELECT * FROM t4_00841;

From 457a9e9691f8330e334355244deebfa4a60c9295 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 12 Apr 2022 14:14:26 +0200
Subject: [PATCH 020/110] fixes for ReplicatedMergeTree

---
 src/Storages/MergeTree/DropPartsRanges.cpp    |  6 +++-
 src/Storages/MergeTree/DropPartsRanges.h      |  2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 24 +++++++--------
 src/Storages/MergeTree/MergeTreeData.h        |  1 +
 src/Storages/MergeTree/MergeTreePartInfo.h    |  2 +-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 17 +++++++----
 .../MergeTree/ReplicatedMergeTreeQueue.h      |  3 ++
 src/Storages/StorageReplicatedMergeTree.cpp   | 30 ++++++++++++-------
 src/Storages/StorageReplicatedMergeTree.h     |  5 +---
 9 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp
index a8dfd807d77..d467a7cac3d 100644
--- a/src/Storages/MergeTree/DropPartsRanges.cpp
+++ b/src/Storages/MergeTree/DropPartsRanges.cpp
@@ -51,12 +51,16 @@ void DropPartsRanges::removeDropRange(const ReplicatedMergeTreeLogEntryPtr & ent
     drop_ranges.erase(it);
 }
 
-bool DropPartsRanges::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const
+bool DropPartsRanges::hasDropRange(const MergeTreePartInfo & new_drop_range_info, MergeTreePartInfo * out_drop_range_info) const
 {
     for (const auto & [_, drop_range] : drop_ranges)
     {
         if (drop_range.contains(new_drop_range_info))
+        {
+            if (out_drop_range_info)
+                *out_drop_range_info = drop_range;
             return true;
+        }
     }
 
     return false;
diff --git a/src/Storages/MergeTree/DropPartsRanges.h b/src/Storages/MergeTree/DropPartsRanges.h
index 4d512263058..8cf25a406ee 100644
--- a/src/Storages/MergeTree/DropPartsRanges.h
+++ b/src/Storages/MergeTree/DropPartsRanges.h
@@ -30,7 +30,7 @@ public:
     bool isAffectedByDropRange(const std::string & new_part_name, std::string & postpone_reason) const;
 
     /// Already has equal DROP_RANGE. Don't need to assign new one
-    bool hasDropRange(const MergeTreePartInfo & new_drop_range_info) const;
+    bool hasDropRange(const MergeTreePartInfo & new_drop_range_info, MergeTreePartInfo * out_drop_range_info = nullptr) const;
 
     /// Add DROP_RANGE to map
     void addDropRange(const ReplicatedMergeTreeLogEntryPtr & entry);
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index a541822d6c1..e1e698d41e3 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2886,7 +2886,7 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
     if (drop_range.min_block > drop_range.max_block)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartName());
 
-    auto partition_range = getDataPartsPartitionRange(drop_range.partition_id);
+    auto partition_range = getVisibleDataPartsVectorInPartition(txn, drop_range.partition_id, &lock);
 
     for (const DataPartPtr & part : partition_range)
     {
@@ -2944,19 +2944,10 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
                             part->name, drop_range.getPartName());
         }
 
-        if (part->getState() == DataPartState::Deleting)
-            continue;
-
-        /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
-        if (txn)
-        {
-            if (!part->version.isVisible(*txn))
-                continue;
-        }
-
         parts_to_remove.emplace_back(part);
     }
 
+    /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
     removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
 
     return parts_to_remove;
@@ -3388,13 +3379,18 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
 
 MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const
 {
-    if (const auto * txn = local_context->getCurrentTransaction().get())
+    return getVisibleDataPartsVectorInPartition(local_context->getCurrentTransaction().get(), partition_id);
+}
+
+MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(MergeTreeTransaction * txn, const String & partition_id, DataPartsLock * acquired_lock) const
+{
+    if (txn)
     {
         DataPartStateAndPartitionID active_parts{MergeTreeDataPartState::Active, partition_id};
         DataPartStateAndPartitionID outdated_parts{MergeTreeDataPartState::Outdated, partition_id};
         DataPartsVector res;
         {
-            auto lock = lockParts();
+            auto lock = (acquired_lock) ? DataPartsLock() : lockParts();
             res.insert(res.end(), data_parts_by_state_and_info.lower_bound(active_parts), data_parts_by_state_and_info.upper_bound(active_parts));
             res.insert(res.end(), data_parts_by_state_and_info.lower_bound(outdated_parts), data_parts_by_state_and_info.upper_bound(outdated_parts));
         }
@@ -3404,7 +3400,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti
 
     DataPartStateAndPartitionID state_with_partition{MergeTreeDataPartState::Active, partition_id};
 
-    auto lock = lockParts();
+    auto lock = (acquired_lock) ? DataPartsLock() : lockParts();
     return DataPartsVector(
         data_parts_by_state_and_info.lower_bound(state_with_partition),
         data_parts_by_state_and_info.upper_bound(state_with_partition));
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 5ce5f30f0dc..3a099c471c2 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -490,6 +490,7 @@ public:
     void swapActivePart(MergeTreeData::DataPartPtr part_copy);
 
     /// Returns all parts in specified partition
+    DataPartsVector getVisibleDataPartsVectorInPartition(MergeTreeTransaction * txn, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const;
     DataPartsVector getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const;
     DataPartsVector getVisibleDataPartsVectorInPartitions(ContextPtr local_context, const std::unordered_set<String> & partition_ids) const;
 
diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h
index 065325920e2..e9ff6f87f0b 100644
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@@ -75,7 +75,7 @@ struct MergeTreePartInfo
     /// Return part mutation version, if part wasn't mutated return zero
     Int64 getMutationVersion() const
     {
-        return mutation ? mutation : 0;
+        return mutation;
     }
 
     /// True if parts do not intersect in any way.
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index c5798aaefe5..78ced7df822 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -94,6 +94,12 @@ bool ReplicatedMergeTreeQueue::isVirtualPart(const MergeTreeData::DataPartPtr &
     return !virtual_part_name.empty() && virtual_part_name != data_part->name;
 }
 
+bool ReplicatedMergeTreeQueue::hasDropRange(const MergeTreePartInfo & part_info, MergeTreePartInfo * out_drop_range_info) const
+{
+    std::lock_guard lock(state_mutex);
+    return drop_ranges.hasDropRange(part_info, out_drop_range_info);
+}
+
 bool ReplicatedMergeTreeQueue::checkPartInQueueAndGetSourceParts(const String & part_name, Strings & source_parts) const
 {
     std::lock_guard lock(state_mutex);
@@ -2141,21 +2147,21 @@ bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart(
     if (pinned_part_uuids.part_uuids.contains(part->uuid))
     {
         if (out_reason)
-            *out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned";
+            *out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid);
         return false;
     }
 
     if (part->name == inprogress_quorum_part)
     {
         if (out_reason)
-            *out_reason = "Quorum insert for part " + part->name + " is currently in progress";
+            *out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name);
         return false;
     }
 
     if (prev_virtual_parts.getContainingPart(part->info).empty())
     {
         if (out_reason)
-            *out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet";
+            *out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name);
         return false;
     }
 
@@ -2167,7 +2173,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart(
     if (containing_part != part->name)
     {
         if (out_reason)
-            *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part;
+            *out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part);
         return false;
     }
 
@@ -2262,8 +2268,7 @@ bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMerge
 
 bool ReplicatedMergeTreeMergePredicate::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const
 {
-    std::lock_guard lock(queue.state_mutex);
-    return queue.drop_ranges.hasDropRange(new_drop_range_info);
+    return queue.hasDropRange(new_drop_range_info);
 }
 
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index ae0ca806344..0c0e872b0ac 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -400,6 +400,9 @@ public:
     /// Checks that part is already in virtual parts
     bool isVirtualPart(const MergeTreeData::DataPartPtr & data_part) const;
 
+    /// Returns true if part_info is covered by some DROP_RANGE
+    bool hasDropRange(const MergeTreePartInfo & part_info, MergeTreePartInfo * out_drop_range_info = nullptr) const;
+
     /// Check that part produced by some entry in queue and get source parts for it.
     /// If there are several entries return largest source_parts set. This rarely possible
     /// for example after replica clone.
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 66a5baf555b..0e61d8b6f81 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1812,7 +1812,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
 
             if (!parts_for_merge.empty() && replica.empty())
             {
-                LOG_INFO(log, "No active replica has part {}. Will fetch merged part instead.", entry.new_part_name);
+                LOG_INFO(log, " {}. Will fetch merged part instead.", entry.new_part_name);
                 /// We should enqueue it for check, because merged part may never appear if source part is lost
                 enqueuePartForCheck(entry.new_part_name);
                 return false;
@@ -6984,42 +6984,39 @@ bool StorageReplicatedMergeTree::dropPartImpl(
         if (!part)
         {
             if (throw_if_noop)
-                throw Exception("Part " + part_name + " not found locally, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
+                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found locally, won't try to drop it.", part_name);
             return false;
         }
 
         if (merge_pred.hasDropRange(part->info))
         {
             if (throw_if_noop)
-                throw Exception("Already has DROP RANGE for part " + part_name + " in queue.", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
+                throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Already has DROP RANGE for part {} in queue.", part_name);
 
             return false;
         }
 
         /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already
         /// finished the merge.
-        if (partIsAssignedToBackgroundOperation(part))
+        String out_reason;
+        if (!merge_pred.canMergeSinglePart(part, &out_reason))
         {
             if (throw_if_noop)
-                throw Exception("Part " + part_name
-                                + " is currently participating in a background operation (mutation/merge)"
-                                + ", try again later", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
+                throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, out_reason);
             return false;
         }
 
         if (partIsLastQuorumPart(part->info))
         {
             if (throw_if_noop)
-                throw Exception("Part " + part_name + " is last inserted part with quorum in partition. Cannot drop",
-                                ErrorCodes::NOT_IMPLEMENTED);
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part {} is last inserted part with quorum in partition. Cannot drop", part_name);
             return false;
         }
 
         if (partIsInsertingWithParallelQuorum(part->info))
         {
             if (throw_if_noop)
-                throw Exception("Part " + part_name + " is inserting with parallel quorum. Cannot drop",
-                                ErrorCodes::NOT_IMPLEMENTED);
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part {} is inserting with parallel quorum. Cannot drop", part_name);
             return false;
         }
 
@@ -7146,6 +7143,17 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition(
         "Cannot assign ALTER PARTITION because another ALTER PARTITION query was concurrently executed");
 }
 
+void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, time_t delay_to_check_seconds)
+{
+    MergeTreePartInfo covering_drop_range;
+    if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range))
+    {
+        LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed",
+                    part_name, covering_drop_range.getPartName());
+        return;
+    }
+    part_check_thread.enqueuePart(part_name, delay_to_check_seconds);
+}
 
 CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, ContextPtr local_context)
 {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 317544c8bb8..59fb3f124c7 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -200,10 +200,7 @@ public:
     void getReplicaDelays(time_t & out_absolute_delay, time_t & out_relative_delay);
 
     /// Add a part to the queue of parts whose data you want to check in the background thread.
-    void enqueuePartForCheck(const String & part_name, time_t delay_to_check_seconds = 0)
-    {
-        part_check_thread.enqueuePart(part_name, delay_to_check_seconds);
-    }
+    void enqueuePartForCheck(const String & part_name, time_t delay_to_check_seconds = 0);
 
     CheckResults checkData(const ASTPtr & query, ContextPtr context) override;
 

From 25101de7e2cd081e2a318cb5c7a46e08048ef731 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Apr 2022 09:29:32 -0400
Subject: [PATCH 021/110] check only selected columns against destination table

---
 src/Interpreters/InterpreterCreateQuery.cpp | 33 ++++++++++-----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index d8bbf5fcca6..9a7f618696a 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1074,28 +1074,25 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Check type compatible for materialized dest table and select columns
     if (create.select && create.is_materialized_view && create.to_table_id)
     {
-        StoragePtr to_table = DatabaseCatalog::instance().getTable({create.to_table_id.database_name,
-            create.to_table_id.table_name,
-            create.to_table_id.uuid},
-            getContext());
-        const auto & to_output_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
-
-        ColumnsWithTypeAndName view_output_columns;
-        for (const auto & [name, type] : properties.columns.getAllPhysical())
-            view_output_columns.emplace_back(type, name);
-
-        Block input_columns = InterpreterSelectWithUnionQuery(
+        Block input_block = InterpreterSelectWithUnionQuery(
             create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
 
-        ActionsDAG::makeConvertingActions(
-            input_columns.getColumnsWithTypeAndName(),
-            view_output_columns,
-            ActionsDAG::MatchColumnsMode::Name);
+        StoragePtr to_table = DatabaseCatalog::instance().getTable(
+            {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
+            getContext()
+        );
+
+        Block to_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
+
+        ColumnsWithTypeAndName output_columns;
+        for (const auto & column : input_block)
+            output_columns.push_back(to_columns.findByName(column.name)->cloneEmpty());
 
         ActionsDAG::makeConvertingActions(
-            view_output_columns,
-            to_output_columns.getColumnsWithTypeAndName(),
-            ActionsDAG::MatchColumnsMode::Name);
+            input_block.getColumnsWithTypeAndName(),
+            output_columns,
+            ActionsDAG::MatchColumnsMode::Name
+        );
     }
 
     DatabasePtr database;

From 1b4cebcfb6f318d9ca98a9d3aae776a6ec2781ed Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 11 Mar 2022 10:04:35 +0000
Subject: [PATCH 022/110] Add new/delete overloads

---
 src/Common/Concepts.h     | 14 +++++++
 src/Common/memory.h       | 77 +++++++++++++++++++++++++++++++--------
 src/Common/new_delete.cpp | 54 ++++++++++++++++++++++++++-
 3 files changed, 127 insertions(+), 18 deletions(-)
 create mode 100644 src/Common/Concepts.h

diff --git a/src/Common/Concepts.h b/src/Common/Concepts.h
new file mode 100644
index 00000000000..b1bf591024d
--- /dev/null
+++ b/src/Common/Concepts.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <concepts>
+
+namespace DB
+{
+
+template <typename... T>
+concept OptionalArgument = requires(T &&...)
+{
+    requires(sizeof...(T) == 0 || sizeof...(T) == 1);
+};
+
+}
diff --git a/src/Common/memory.h b/src/Common/memory.h
index 41b10a57db4..3b819f295ca 100644
--- a/src/Common/memory.h
+++ b/src/Common/memory.h
@@ -1,8 +1,11 @@
 #pragma once
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wold-style-cast"
 
 #include <new>
 #include <base/defines.h>
 
+#include <Common/Concepts.h>
 #include <Common/CurrentMemoryTracker.h>
 #include <Common/config.h>
 
@@ -14,13 +17,24 @@
 #    include <cstdlib>
 #endif
 
-
 namespace Memory
 {
 
-inline ALWAYS_INLINE void * newImpl(std::size_t size)
+inline ALWAYS_INLINE size_t alignToSizeT(std::align_val_t align) noexcept
 {
-    auto * ptr = malloc(size);
+    return static_cast<size_t>(align);
+}
+
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
+{
+    void * ptr = nullptr;
+    if constexpr (sizeof...(TAlign) == 1)
+        ptr = aligned_alloc(alignToSizeT(align...), size);
+    else
+        ptr = malloc(size);
+
     if (likely(ptr != nullptr))
         return ptr;
 
@@ -33,6 +47,11 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
     return malloc(size);
 }
 
+inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
+{
+    return aligned_alloc(static_cast<size_t>(align), size);
+}
+
 inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
 {
     free(ptr);
@@ -40,17 +59,24 @@ inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
 
 #if USE_JEMALLOC
 
-inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... align) noexcept
 {
     if (unlikely(ptr == nullptr))
         return;
 
-    sdallocx(ptr, size, 0);
+    if constexpr (sizeof...(TAlign) == 1)
+        sdallocx(ptr, size, MALLOCX_ALIGN(alignToSizeT(align...)));
+    else
+        sdallocx(ptr, size, 0);
 }
 
 #else
 
-inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
 {
     free(ptr);
 }
@@ -58,13 +84,14 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unuse
 #endif
 
 #if defined(OS_LINUX)
-#   include <malloc.h>
+#    include <malloc.h>
 #elif defined(OS_DARWIN)
-#   include <malloc/malloc.h>
+#    include <malloc/malloc.h>
 #endif
 
-
-inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align)
 {
     size_t actual_size = size;
 
@@ -72,26 +99,41 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
     /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
     /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
     if (likely(size != 0))
-        actual_size = nallocx(size, 0);
+    {
+        if constexpr (sizeof...(TAlign) == 1)
+            actual_size = nallocx(size, MALLOCX_ALIGN(alignToSizeT(align...)));
+        else
+            actual_size = nallocx(size, 0);
+    }
 #endif
 
     return actual_size;
 }
 
-inline ALWAYS_INLINE void trackMemory(std::size_t size)
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align)
 {
-    std::size_t actual_size = getActualAllocationSize(size);
+    std::size_t actual_size = getActualAllocationSize(size, align...);
     CurrentMemoryTracker::allocNoThrow(actual_size);
 }
 
-inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
+template <std::same_as<std::align_val_t>... TAlign>
+requires DB::OptionalArgument<TAlign...>
+inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align) noexcept
 {
     try
     {
 #if USE_JEMALLOC
+
         /// @note It's also possible to use je_malloc_usable_size() here.
         if (likely(ptr != nullptr))
-            CurrentMemoryTracker::free(sallocx(ptr, 0));
+        {
+            if constexpr (sizeof...(TAlign) == 1)
+                CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))));
+            else
+                CurrentMemoryTracker::free(sallocx(ptr, 0));
+        }
 #else
         if (size)
             CurrentMemoryTracker::free(size);
@@ -103,7 +145,10 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t
 #endif
     }
     catch (...)
-    {}
+    {
+    }
 }
 
 }
+
+#pragma GCC diagnostic pop
diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp
index 8908d140b90..7b4bff04185 100644
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@@ -1,6 +1,7 @@
-#include <Common/memory.h>
-#include <Common/config.h>
+#include <cassert>
 #include <new>
+#include <Common/config.h>
+#include <Common/memory.h>
 
 #if defined(OS_DARWIN) && (USE_JEMALLOC)
 /// In case of OSX jemalloc register itself as a default zone allocator.
@@ -53,12 +54,24 @@ void * operator new(std::size_t size)
     return Memory::newImpl(size);
 }
 
+void * operator new(std::size_t size, std::align_val_t align)
+{
+    Memory::trackMemory(size, align);
+    return Memory::newImpl(size, align);
+}
+
 void * operator new[](std::size_t size)
 {
     Memory::trackMemory(size);
     return Memory::newImpl(size);
 }
 
+void * operator new[](std::size_t size, std::align_val_t align)
+{
+    Memory::trackMemory(size, align);
+    return Memory::newImpl(size, align);
+}
+
 void * operator new(std::size_t size, const std::nothrow_t &) noexcept
 {
     Memory::trackMemory(size);
@@ -71,6 +84,18 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
     return Memory::newNoExept(size);
 }
 
+void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
+{
+    Memory::trackMemory(size, align);
+    return Memory::newNoExept(size, align);
+}
+
+void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
+{
+    Memory::trackMemory(size, align);
+    return Memory::newNoExept(size, align);
+}
+
 /// delete
 
 /// C++17 std 21.6.2.1 (11)
@@ -81,26 +106,51 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
 /// It's unspecified whether size-aware or size-unaware version is called when deleting objects of
 /// incomplete type and arrays of non-class and trivially-destructible class types.
 
+
 void operator delete(void * ptr) noexcept
 {
     Memory::untrackMemory(ptr);
     Memory::deleteImpl(ptr);
 }
 
+void operator delete(void * ptr, std::align_val_t align) noexcept
+{
+    Memory::untrackMemory(ptr, 0, align);
+    Memory::deleteImpl(ptr);
+}
+
 void operator delete[](void * ptr) noexcept
 {
     Memory::untrackMemory(ptr);
     Memory::deleteImpl(ptr);
 }
 
+void operator delete[](void * ptr, std::align_val_t align) noexcept
+{
+    Memory::untrackMemory(ptr, 0, align);
+    Memory::deleteImpl(ptr);
+}
+
 void operator delete(void * ptr, std::size_t size) noexcept
 {
     Memory::untrackMemory(ptr, size);
     Memory::deleteSized(ptr, size);
 }
 
+void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept
+{
+    Memory::untrackMemory(ptr, size, align);
+    Memory::deleteSized(ptr, size, align);
+}
+
 void operator delete[](void * ptr, std::size_t size) noexcept
 {
     Memory::untrackMemory(ptr, size);
     Memory::deleteSized(ptr, size);
 }
+
+void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept
+{
+    Memory::untrackMemory(ptr, size, align);
+    Memory::deleteSized(ptr, size, align);
+}

From 586853960530dad3dfe56ae07323b00cb679e811 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Apr 2022 10:17:39 -0400
Subject: [PATCH 023/110] check only if TO table exists

---
 src/Interpreters/InterpreterCreateQuery.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 9a7f618696a..5ffbf8963b7 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1072,7 +1072,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create);
 
     /// Check type compatible for materialized dest table and select columns
-    if (create.select && create.is_materialized_view && create.to_table_id)
+    if (create.select && create.is_materialized_view && create.to_table_id &&
+        DatabaseCatalog::instance().isTableExist(
+            {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
+            getContext()
+        )
+    )
     {
         Block input_block = InterpreterSelectWithUnionQuery(
             create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();

From a4f56f333052fa1ffa8d76d25087d98e5a54ed68 Mon Sep 17 00:00:00 2001
From: Julian Gilyadov <israelg99@gmail.com>
Date: Mon, 4 Apr 2022 14:23:34 +0200
Subject: [PATCH 024/110] Throw exception when file cant be executed instead of
 displaying success

---
 src/Common/filesystemHelpers.cpp                    | 13 ++++++++++++-
 src/Common/filesystemHelpers.h                      |  2 ++
 src/Dictionaries/ExecutableDictionarySource.cpp     | 11 +++++++----
 src/Dictionaries/ExecutablePoolDictionarySource.cpp | 10 +++++++---
 .../UserDefinedExecutableFunctionFactory.cpp        | 10 +++++++---
 src/Storages/StorageExecutable.cpp                  | 12 ++++++++----
 6 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp
index 6c5813ab29c..ca06b21ab3a 100644
--- a/src/Common/filesystemHelpers.cpp
+++ b/src/Common/filesystemHelpers.cpp
@@ -234,6 +234,11 @@ bool createFile(const std::string & path)
     DB::throwFromErrnoWithPath("Cannot create file: " + path, path, DB::ErrorCodes::CANNOT_CREATE_FILE);
 }
 
+bool exists(const std::string & path)
+{
+    return faccessat(AT_FDCWD, path.c_str(), F_OK, AT_EACCESS) == 0;
+}
+
 bool canRead(const std::string & path)
 {
     struct stat st;
@@ -249,7 +254,6 @@ bool canRead(const std::string & path)
     DB::throwFromErrnoWithPath("Cannot check read access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
 }
 
-
 bool canWrite(const std::string & path)
 {
     struct stat st;
@@ -265,6 +269,13 @@ bool canWrite(const std::string & path)
     DB::throwFromErrnoWithPath("Cannot check write access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
 }
 
+bool canExecute(const std::string & path)
+{
+    if (exists(path))
+        return faccessat(AT_FDCWD, path.c_str(), X_OK, AT_EACCESS) == 0;
+    DB::throwFromErrnoWithPath("Cannot check execute access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+}
+
 time_t getModificationTime(const std::string & path)
 {
     struct stat st;
diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h
index 27041a2f0a8..b15073796a0 100644
--- a/src/Common/filesystemHelpers.h
+++ b/src/Common/filesystemHelpers.h
@@ -70,8 +70,10 @@ namespace FS
 {
 bool createFile(const std::string & path);
 
+bool exists(const std::string & path);
 bool canRead(const std::string & path);
 bool canWrite(const std::string & path);
+bool canExecute(const std::string & path);
 
 time_t getModificationTime(const std::string & path);
 Poco::Timestamp getModificationTimestamp(const std::string & path);
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 7a3550e7284..762e26cd2e2 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -7,7 +7,6 @@
 #include <base/logger_useful.h>
 #include <Common/LocalDateTime.h>
 #include <Common/filesystemHelpers.h>
-#include <Common/ShellCommand.h>
 
 #include <Processors/Sources/ShellCommandSource.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
@@ -15,12 +14,10 @@
 
 #include <Interpreters/Context.h>
 #include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
 
 #include <Dictionaries/DictionarySourceFactory.h>
 #include <Dictionaries/DictionarySourceHelpers.h>
 #include <Dictionaries/DictionaryStructure.h>
-#include <Dictionaries/registerDictionaries.h>
 
 
 namespace DB
@@ -51,12 +48,18 @@ namespace
                 command,
                 user_scripts_path);
 
-        if (!std::filesystem::exists(std::filesystem::path(script_path)))
+        if (!FS::exists(script_path))
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
                 "Executable file {} does not exist inside user scripts folder {}",
                 command,
                 user_scripts_path);
 
+        if (!FS::canExecute(script_path))
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Executable file {} is not executable inside user scripts folder {}",
+                command,
+                user_scripts_path);
+
         command = std::move(script_path);
     }
 
diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp
index 62598c966e5..a7c152c02f0 100644
--- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp
+++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp
@@ -7,7 +7,6 @@
 #include <base/logger_useful.h>
 #include <Common/LocalDateTime.h>
 #include <Common/filesystemHelpers.h>
-#include <Common/ShellCommand.h>
 
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Sources/ShellCommandSource.h>
@@ -20,7 +19,6 @@
 #include <Dictionaries/DictionarySourceHelpers.h>
 #include <Dictionaries/DictionaryStructure.h>
 
-
 namespace DB
 {
 
@@ -113,12 +111,18 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)
                 command,
                 user_scripts_path);
 
-        if (!std::filesystem::exists(std::filesystem::path(script_path)))
+        if (!FS::exists(script_path))
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
                 "Executable file {} does not exist inside user scripts folder {}",
                 command,
                 user_scripts_path);
 
+        if (!FS::canExecute(script_path))
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Executable file {} is not executable inside user scripts folder {}",
+                command,
+                user_scripts_path);
+
         command = std::move(script_path);
     }
 
diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp
index d3a38f42e21..5b5c7911735 100644
--- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp
+++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp
@@ -4,8 +4,6 @@
 
 #include <Common/filesystemHelpers.h>
 
-#include <IO/WriteHelpers.h>
-
 #include <Processors/Sources/ShellCommandSource.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Formats/formatBlock.h>
@@ -78,12 +76,18 @@ public:
                     command,
                     user_scripts_path);
 
-            if (!std::filesystem::exists(std::filesystem::path(script_path)))
+            if (!FS::exists(script_path))
                 throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
                     "Executable file {} does not exist inside user scripts folder {}",
                     command,
                     user_scripts_path);
 
+            if (!FS::canExecute(script_path))
+                throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                    "Executable file {} is not executable inside user scripts folder {}",
+                    command,
+                    user_scripts_path);
+
             command = std::move(script_path);
         }
 
diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp
index d9e97f98d56..577b4cab585 100644
--- a/src/Storages/StorageExecutable.cpp
+++ b/src/Storages/StorageExecutable.cpp
@@ -1,21 +1,19 @@
 #include <Storages/StorageExecutable.h>
 
 #include <filesystem>
+#include <unistd.h>
 
 #include <boost/algorithm/string/split.hpp>
 
-#include <Common/ShellCommand.h>
 #include <Common/filesystemHelpers.h>
 
 #include <Core/Block.h>
 
-#include <IO/ReadHelpers.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 
 #include <QueryPipeline/Pipe.h>
-#include <Processors/ISimpleTransform.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
@@ -123,12 +121,18 @@ Pipe StorageExecutable::read(
             script_name,
             user_scripts_path);
 
-    if (!std::filesystem::exists(std::filesystem::path(script_path)))
+    if (!FS::exists(script_path))
          throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
             "Executable file {} does not exist inside user scripts folder {}",
             script_name,
             user_scripts_path);
 
+    if (!FS::canExecute(script_path))
+         throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "Executable file {} is not executable inside user scripts folder {}",
+            script_name,
+            user_scripts_path);
+
     Pipes inputs;
     inputs.reserve(input_queries.size());
 

From 0679d4814dd37b7c942896336414ab9e3d2f3c4a Mon Sep 17 00:00:00 2001
From: Anton Popov <ap@clickhouse.com>
Date: Tue, 12 Apr 2022 18:58:40 +0000
Subject: [PATCH 025/110] allow to convert empty string to objects

---
 src/DataTypes/ObjectUtils.cpp                   |  5 -----
 .../Serializations/SerializationObject.cpp      |  7 +++++++
 .../01825_type_json_empty_string.reference      |  4 ++++
 .../01825_type_json_empty_string.sql            | 10 ++++++++++
 .../01825_type_json_ephemeral.reference         |  1 +
 .../0_stateless/01825_type_json_ephemeral.sql   | 17 +++++++++++++++++
 6 files changed, 39 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/01825_type_json_empty_string.reference
 create mode 100644 tests/queries/0_stateless/01825_type_json_empty_string.sql
 create mode 100644 tests/queries/0_stateless/01825_type_json_ephemeral.reference
 create mode 100644 tests/queries/0_stateless/01825_type_json_ephemeral.sql

diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp
index cbabc71a965..629cd9c28b2 100644
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@@ -139,11 +139,6 @@ void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_st
         if (!isObject(column.type))
             continue;
 
-        if (!isObject(column.type))
-            throw Exception(ErrorCodes::TYPE_MISMATCH,
-                "Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
-                column.name, column.type->getName(), column.type->getName());
-
         const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
         const auto & subcolumns = column_object.getSubcolumns();
 
diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp
index 64d79d107e1..82f9552fb86 100644
--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@@ -136,10 +136,17 @@ void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader &
     reader(buf);
     std::optional<ParseResult> result;
 
+    /// Treat empty string as an empty object
+    /// for better CAST from String to Object.
+    if (!buf.empty())
     {
         auto parser = parsers_pool.get([] { return new Parser; });
         result = parser->parse(buf.data(), buf.size());
     }
+    else
+    {
+        result = ParseResult{};
+    }
 
     if (!result)
         throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse object");
diff --git a/tests/queries/0_stateless/01825_type_json_empty_string.reference b/tests/queries/0_stateless/01825_type_json_empty_string.reference
new file mode 100644
index 00000000000..7e3d2e54336
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_empty_string.reference
@@ -0,0 +1,4 @@
+1	(0,'')
+2	(1,'v1')
+3	(0,'')
+4	(2,'')
diff --git a/tests/queries/0_stateless/01825_type_json_empty_string.sql b/tests/queries/0_stateless/01825_type_json_empty_string.sql
new file mode 100644
index 00000000000..5af3af51208
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_empty_string.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS t_json_empty_str;
+SET allow_experimental_object_type = 1;
+
+CREATE TABLE t_json_empty_str(id UInt32, o JSON) ENGINE = Memory;
+
+INSERT INTO t_json_empty_str VALUES (1, ''), (2, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');
+
+SELECT * FROM t_json_empty_str ORDER BY id;
+
+DROP TABLE t_json_empty_str;
diff --git a/tests/queries/0_stateless/01825_type_json_ephemeral.reference b/tests/queries/0_stateless/01825_type_json_ephemeral.reference
new file mode 100644
index 00000000000..67d2a24d3fe
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_ephemeral.reference
@@ -0,0 +1 @@
+PushEvent	some-repo	(('https://avatars.githubusercontent.com/u/123213213?','github-actions','',123123123,'github-actions[bot]','https://api.github.com/users/github-actions[bot]'),'2022-01-04 07:00:00',(1001001010101,'some-repo','https://api.github.com/repos/some-repo'),'PushEvent')
diff --git a/tests/queries/0_stateless/01825_type_json_ephemeral.sql b/tests/queries/0_stateless/01825_type_json_ephemeral.sql
new file mode 100644
index 00000000000..276fdd2380a
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_ephemeral.sql
@@ -0,0 +1,17 @@
+SET allow_experimental_object_type = 1;
+
+DROP TABLE IF EXISTS t_github_json;
+
+CREATE table t_github_json
+(
+    event_type LowCardinality(String) DEFAULT JSONExtractString(message_raw, 'type'),
+    repo_name LowCardinality(String) DEFAULT JSONExtractString(message_raw, 'repo', 'name'),
+    message JSON DEFAULT message_raw,
+    message_raw String EPHEMERAL
+) ENGINE = MergeTree ORDER BY (event_type, repo_name);
+
+INSERT INTO t_github_json (message_raw) FORMAT JSONEachRow {"message_raw": "{\"type\":\"PushEvent\", \"created_at\": \"2022-01-04 07:00:00\", \"actor\":{\"avatar_url\":\"https://avatars.githubusercontent.com/u/123213213?\",\"display_login\":\"github-actions\",\"gravatar_id\":\"\",\"id\":123123123,\"login\":\"github-actions[bot]\",\"url\":\"https://api.github.com/users/github-actions[bot]\"},\"repo\":{\"id\":1001001010101,\"name\":\"some-repo\",\"url\":\"https://api.github.com/repos/some-repo\"}}"}
+
+SELECT * FROM t_github_json ORDER BY event_type, repo_name;
+
+DROP TABLE t_github_json;

From 3d52afed71a51a29c484d9c422eca85b46f30e35 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Apr 2022 15:10:11 -0400
Subject: [PATCH 026/110] only check columns sets intersection, some
 refactoring

---
 src/Interpreters/InterpreterCreateQuery.cpp | 43 +++++++++++----------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 5ffbf8963b7..8e13ce1d487 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1072,32 +1072,35 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create);
 
     /// Check type compatible for materialized dest table and select columns
-    if (create.select && create.is_materialized_view && create.to_table_id &&
-        DatabaseCatalog::instance().isTableExist(
-            {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
-            getContext()
-        )
-    )
+    if (create.select && create.is_materialized_view && create.to_table_id)
     {
-        Block input_block = InterpreterSelectWithUnionQuery(
-            create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
-
-        StoragePtr to_table = DatabaseCatalog::instance().getTable(
+        if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(
             {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
             getContext()
-        );
+        ))
+        {
+            Block input_block = InterpreterSelectWithUnionQuery(
+                create.select->clone(), getContext(), SelectQueryOptions().analyze()).getSampleBlock();
 
-        Block to_columns = to_table->getInMemoryMetadataPtr()->getSampleBlock();
+            Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock();
 
-        ColumnsWithTypeAndName output_columns;
-        for (const auto & column : input_block)
-            output_columns.push_back(to_columns.findByName(column.name)->cloneEmpty());
+            ColumnsWithTypeAndName input_columns;
+            ColumnsWithTypeAndName output_columns;
+            for (const auto & input_column : input_block)
+            {
+                if (const auto * output_column = output_block.findByName(input_column.name))
+                {
+                    input_columns.push_back(input_column.cloneEmpty());
+                    output_columns.push_back(output_column->cloneEmpty());
+                }
+            }
 
-        ActionsDAG::makeConvertingActions(
-            input_block.getColumnsWithTypeAndName(),
-            output_columns,
-            ActionsDAG::MatchColumnsMode::Name
-        );
+            ActionsDAG::makeConvertingActions(
+                input_columns,
+                output_columns,
+                ActionsDAG::MatchColumnsMode::Position
+            );
+        }
     }
 
     DatabasePtr database;

From c675e2187292fb06a55c455040338f76b2793404 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Apr 2022 15:49:08 -0400
Subject: [PATCH 027/110] tests updated

---
 .../0_stateless/01880_materialized_view_to_table_type_check.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql
index 342ef08bc89..2da9884ba8e 100644
--- a/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql
+++ b/tests/queries/0_stateless/01880_materialized_view_to_table_type_check.sql
@@ -8,8 +8,6 @@ CREATE TABLE test(`id` Int32, `pv` AggregateFunction(sum, Int32)) ENGINE = Aggre
 
 CREATE MATERIALIZED VIEW test_mv to test(`id` Int32, `pv` AggregateFunction(sum, Int32)) as SELECT id, sumState(1) as pv from test_input group by id; -- { serverError 70 } 
 
-DROP VIEW test_mv;
-
 INSERT INTO test_input SELECT toInt32(number % 1000) AS id FROM numbers(10);
 select '----------test--------:';
 select * from test;

From b066585cae14bc7f99a64653b8555efc232ef72d Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Apr 2022 22:12:58 +0200
Subject: [PATCH 028/110] Apply suggestions from code review

Co-authored-by: Vladimir C <vdimir@clickhouse.com>
---
 tests/queries/0_stateless/01825_type_json_empty_string.sql | 2 ++
 tests/queries/0_stateless/01825_type_json_ephemeral.sql    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01825_type_json_empty_string.sql b/tests/queries/0_stateless/01825_type_json_empty_string.sql
index 5af3af51208..b3f2a7cb120 100644
--- a/tests/queries/0_stateless/01825_type_json_empty_string.sql
+++ b/tests/queries/0_stateless/01825_type_json_empty_string.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 DROP TABLE IF EXISTS t_json_empty_str;
 SET allow_experimental_object_type = 1;
 
diff --git a/tests/queries/0_stateless/01825_type_json_ephemeral.sql b/tests/queries/0_stateless/01825_type_json_ephemeral.sql
index 276fdd2380a..4485510e419 100644
--- a/tests/queries/0_stateless/01825_type_json_ephemeral.sql
+++ b/tests/queries/0_stateless/01825_type_json_ephemeral.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 SET allow_experimental_object_type = 1;
 
 DROP TABLE IF EXISTS t_github_json;

From 835f752a1ae9135ac35ff7da98a6cba0d34c4298 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 13 Apr 2022 06:19:59 +0000
Subject: [PATCH 029/110] Add maybe_unused for align arguments

---
 src/Common/memory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/memory.h b/src/Common/memory.h
index 3b819f295ca..8a6b98933fb 100644
--- a/src/Common/memory.h
+++ b/src/Common/memory.h
@@ -91,7 +91,7 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unuse
 
 template <std::same_as<std::align_val_t>... TAlign>
 requires DB::OptionalArgument<TAlign...>
-inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align)
+inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align [[maybe_unused]])
 {
     size_t actual_size = size;
 
@@ -120,7 +120,7 @@ inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align)
 
 template <std::same_as<std::align_val_t>... TAlign>
 requires DB::OptionalArgument<TAlign...>
-inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align) noexcept
+inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
 {
     try
     {

From 1b267462d836d92a5b84449accf8a1781165d42f Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 11:27:47 +0200
Subject: [PATCH 030/110] Update CachedReadBufferFromRemoteFS.cpp

---
 src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
index d39d54970dc..1cc819a7443 100644
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
@@ -336,7 +336,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
             size_t seek_offset = file_offset_of_buffer_end - range.left;
 
             if (file_offset_of_buffer_end < range.left)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} > {}", file_offset_of_buffer_end, range.left);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed invariant. Expected {} > {}", file_offset_of_buffer_end, range.left);
 
             read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
 

From a611566266a33a2cd593aafc8f3c7bd559c5eabb Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 11:33:15 +0200
Subject: [PATCH 031/110] Update FileCache.h

---
 src/Common/FileCache.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h
index b1d46569949..01b639ea1c9 100644
--- a/src/Common/FileCache.h
+++ b/src/Common/FileCache.h
@@ -72,6 +72,15 @@ public:
      */
     virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
 
+    /**
+     * Segments in returned list are ordered in ascending order and represent a full contiguous
+     * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
+     *
+     * If file segment has state EMPTY, then it is also marked as "detached". E.g. it is "detached"
+     * from cache (not owned by cache), and as a result will never change it's state and will be destructed
+     * with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change 
+     * it's state (and become DOWNLOADED).
+     */
     virtual FileSegmentsHolder get(const Key & key, size_t offset, size_t size) = 0;
 
     virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;

From 053892777f2202d604a1d201ea0fcb97f0713489 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 11:35:12 +0200
Subject: [PATCH 032/110] Update FileSegment.cpp

---
 src/Common/FileSegment.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 92a478f3476..4f144362ed9 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -667,6 +667,9 @@ FileSegmentsHolder::~FileSegmentsHolder()
 
         if (file_segment->detached)
         {
+            /// This file segment is not owned by cache, so it will be destructed
+            /// at this point, therefore no completion required.
+            assert(file_segment->state() == State::EMPTY);
             file_segment_it = file_segments.erase(current_file_segment_it);
             continue;
         }

From 44ecfd925bf31a24fec382baa4d46e0c94489b15 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 11:35:46 +0200
Subject: [PATCH 033/110] Update FileSegment.cpp

---
 src/Common/FileSegment.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 4f144362ed9..50a1ecb4a6c 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -669,7 +669,7 @@ FileSegmentsHolder::~FileSegmentsHolder()
         {
             /// This file segment is not owned by cache, so it will be destructed
             /// at this point, therefore no completion required.
-            assert(file_segment->state() == State::EMPTY);
+            assert(file_segment->state() == FileSegment::State::EMPTY);
             file_segment_it = file_segments.erase(current_file_segment_it);
             continue;
         }

From 7b0f1fbb889bff72a7e5013d967d7ac7053c49b4 Mon Sep 17 00:00:00 2001
From: Michael Lex <michael.lex@clickhouse.com>
Date: Wed, 13 Apr 2022 12:05:51 +0200
Subject: [PATCH 034/110] Extend TLS documentation.

Information about used defaults.
Better explanation of the extendedVerification option (which enables hostname verification)
Better explanation of the caConfig option and how files vs. paths are treated.
Mention TLS config in CLI documentation and link to server-side settings (which are the same).
---
 docs/en/interfaces/cli.md                     |  9 ++-
 .../settings.md                               | 56 +++++++++----------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md
index eaf7a96ce42..a252f55de2c 100644
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@@ -124,7 +124,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
 -   `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode.
 -   `--stacktrace` – If specified, also print the stack trace if an exception occurs.
 -   `--config-file` – The name of the configuration file.
--   `--secure` – If specified, will connect to server over secure connection.
+-   `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
 -   `--history_file` — Path to a file containing command history.
 -   `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
 -   `--hardware-utilization` — Print hardware utilization information in progress bar.
@@ -148,7 +148,12 @@ Example of a config file:
 <config>
     <user>username</user>
     <password>password</password>
-    <secure>False</secure>
+    <secure>true</secure>
+    <openSSL>
+      <client>
+        <caConfig>/etc/ssl/cert.pem</caConfig>
+      </client>
+    </openSSL>
 </config>
 ```
 
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 301b348925f..fb53fd38fe3 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -366,12 +366,12 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
 <http_server_default_response>
   <![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]>
 </http_server_default_response>
-```  
+```
 ## hsts_max_age  {#hsts-max-age}
-  
-Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set.  
-  
-**Example**  
+
+Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set.
+
+**Example**
 
 ```xml
 <hsts_max_age>600000</hsts_max_age>
@@ -468,7 +468,7 @@ To enable authentication, set `interserver_http_credentials.allow_empty` to `tru
 
 After configuring all replicas set `allow_empty` to `false` or remove this setting. It makes authentication with new credentials mandatory.
 
-To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials. 
+To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials.
 
 ``` xml
 <interserver_http_credentials>
@@ -834,7 +834,7 @@ The value 0 means that you can delete all tables without any restrictions.
 
 ClickHouse uses threads from the Global Thread pool to process queries. If there is no idle thread to process a query, then a new thread is created in the pool. `max_thread_pool_size` limits the maximum number of threads in the pool.
 
-Possible values: 
+Possible values:
 
 -   Positive integer.
 
@@ -850,7 +850,7 @@ Default value: `10000`.
 
 If the number of **idle** threads in the Global Thread pool is greater than `max_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
 
-Possible values: 
+Possible values:
 
 -   Positive integer.
 
@@ -866,7 +866,7 @@ Default value: `1000`.
 
 The maximum number of jobs that can be scheduled on the Global Thread pool. Increasing queue size leads to larger memory usage. It is recommended to keep this value equal to [max_thread_pool_size](#max-thread-pool-size).
 
-Possible values: 
+Possible values:
 
 -   Positive integer.
 
@@ -941,30 +941,30 @@ For more information, see the MergeTreeSettings.h header file.
 
 SSL client/server configuration.
 
-Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h)
+Support for SSL is provided by the `libpoco` library. The available configuration options are explained in [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h). Default values can be found in [SSLManager.cpp](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/src/SSLManager.cpp).
 
 Keys for server/client settings:
 
 -   privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time.
 -   certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate.
--   caConfig – The path to the file or directory that contains trusted root certificates.
--   verificationMode – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`.
--   verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value.
--   loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \|
--   cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`.
--   cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`.
--   sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`.
--   sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions.
--   sessionTimeout – Time for caching the session on the server.
--   extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
--   requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`.
--   requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
--   requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
--   fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS.
--   privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
--   invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
--   disableProtocols – Protocols that are not allowed to use.
--   preferServerCiphers – Preferred server ciphers on the client.
+-   caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html).
+-   verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`.
+-   verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value.
+-   loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`).
+-   cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions.
+-   cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`.
+-   sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`.
+-   sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions.
+-   sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server.
+-   extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname.
+-   requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`.
+-   requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
+-   requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
+-   fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS.
+-   privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
+-   invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
+-   disableProtocols (default: "") – Protocols that are not allowed to use.
+-   preferServerCiphers (default: false) – Preferred server ciphers on the client.
 
 **Example of settings:**
 

From 07cfa980ace109ed333a9185bd7a0a63f73e8903 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 12:08:25 +0200
Subject: [PATCH 035/110] Update FileCache.h

---
 src/Common/FileCache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h
index 01b639ea1c9..983156959de 100644
--- a/src/Common/FileCache.h
+++ b/src/Common/FileCache.h
@@ -78,7 +78,7 @@ public:
      *
      * If file segment has state EMPTY, then it is also marked as "detached". E.g. it is "detached"
      * from cache (not owned by cache), and as a result will never change it's state and will be destructed
-     * with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change 
+     * with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
      * it's state (and become DOWNLOADED).
      */
     virtual FileSegmentsHolder get(const Key & key, size_t offset, size_t size) = 0;

From a4d74f975e8ae91513a87c1427b4d8d32f0ba379 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 13 Apr 2022 10:20:29 +0000
Subject: [PATCH 036/110] Use atomic instead of mutex + condvar in
 ParallelReadBuffer

---
 src/IO/ParallelReadBuffer.cpp | 16 ++++++++--------
 src/IO/ParallelReadBuffer.h   |  4 +---
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp
index 79a16347094..fac6c14c082 100644
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@@ -205,12 +205,8 @@ bool ParallelReadBuffer::nextImpl()
 void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
 {
     SCOPE_EXIT({
-        std::lock_guard lock{mutex};
-        --active_working_reader;
-        if (active_working_reader == 0)
-        {
-            readers_done.notify_all();
-        }
+        if (--active_working_reader == 0)
+            active_working_reader.notify_all();
     });
 
     try
@@ -265,8 +261,12 @@ void ParallelReadBuffer::finishAndWait()
 {
     emergency_stop = true;
 
-    std::unique_lock lock{mutex};
-    readers_done.wait(lock, [&] { return active_working_reader == 0; });
+    size_t active_readers = active_working_reader.load();
+    while (active_readers != 0)
+    {
+        active_working_reader.wait(active_readers);
+        active_readers = active_working_reader.load();
+    }
 }
 
 }
diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h
index eda88969afa..50bec6663ce 100644
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@@ -135,9 +135,7 @@ private:
     Segment current_segment;
 
     size_t max_working_readers;
-    size_t active_working_reader{0};
-    // Triggered when all reader workers are done
-    std::condition_variable readers_done;
+    std::atomic_size_t active_working_reader{0};
 
     CallbackRunner schedule;
 

From 7af954cef9428afb68e7de7584ae2e8dd4baa6fd Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@clickhouse.com>
Date: Wed, 13 Apr 2022 14:37:44 +0300
Subject: [PATCH 037/110] Update clickhouse-test

---
 tests/clickhouse-test | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index b52a9f4f026..5f15ecf3dc6 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1176,6 +1176,9 @@ def check_server_started(args):
         except TimeoutError:
             print("\nConnection timeout, will not retry")
             break
+        except Exception as e:
+            print("\nUexpected exception, will not retry: ", str(e))
+            break
 
     print('\nAll connection tries failed')
     sys.stdout.flush()

From b2905c5aed8575306e224fba4a834f7239be2811 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 13 Apr 2022 13:50:11 +0200
Subject: [PATCH 038/110] Update src/IO/ParallelReadBuffer.cpp

Co-authored-by: tavplubix <tavplubix@gmail.com>
---
 src/IO/ParallelReadBuffer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp
index fac6c14c082..3db4a45778e 100644
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@@ -205,7 +205,7 @@ bool ParallelReadBuffer::nextImpl()
 void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
 {
     SCOPE_EXIT({
-        if (--active_working_reader == 0)
+        if (active_working_reader.fetch_sub(1) == 1)
             active_working_reader.notify_all();
     });
 

From ab80b94f4573bff6d90e55811458d049be964e50 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Apr 2022 15:18:49 +0300
Subject: [PATCH 039/110] Update ThreadPool.cpp

---
 src/Common/ThreadPool.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 15ae64f17fb..c5f806c8a89 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -245,7 +245,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
     while (true)
     {
         /// This is inside the loop to also reset previous thread names set inside the jobs.
-        setThreadName("ThreadPoolIdle");
+        setThreadName("ThreadPool");
 
         Job job;
         bool need_shutdown = false;

From 9ae047df1d7b35dce0ba8ca7af0c90ba029430a0 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 13 Apr 2022 14:30:10 +0200
Subject: [PATCH 040/110] Update CachedReadBufferFromRemoteFS.cpp

---
 src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
index 1cc819a7443..90a1d4ad43e 100644
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
@@ -336,7 +336,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
             size_t seek_offset = file_offset_of_buffer_end - range.left;
 
             if (file_offset_of_buffer_end < range.left)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed invariant. Expected {} > {}", file_offset_of_buffer_end, range.left);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Invariant failed. Expected {} > {} (current offset > file segment's start offset)", file_offset_of_buffer_end, range.left);
 
             read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
 

From 348cae0d165d00a467844779e5a2db952e440013 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 13 Apr 2022 12:34:40 +0000
Subject: [PATCH 041/110] Fix possible segfault in schema inference for JSON
 formats

---
 src/Formats/JSONEachRowUtils.cpp                          | 7 +++++++
 ...268_json_wrong_root_type_in_schema_inference.reference | 0
 .../02268_json_wrong_root_type_in_schema_inference.sql    | 8 ++++++++
 3 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.reference
 create mode 100644 tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql

diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 5e1830655f9..3d0634e0259 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -255,6 +255,10 @@ struct JSONEachRowFieldsExtractor
     std::vector<Element> extract(const Element & element)
     {
         /// {..., "<column_name>" : <value>, ...}
+
+        if (!element.isObject())
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object");
+
         auto object = element.getObject();
         std::vector<Element> fields;
         fields.reserve(object.size());
@@ -287,6 +291,9 @@ struct JSONCompactEachRowFieldsExtractor
     std::vector<Element> extract(const Element & element)
     {
         /// [..., <value>, ...]
+        if (!element.isArray())
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array");
+
         auto array = element.getArray();
         std::vector<Element> fields;
         fields.reserve(array.size());
diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.reference b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
new file mode 100644
index 00000000000..2e66635a752
--- /dev/null
+++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
@@ -0,0 +1,8 @@
+-- Tags: no-backward-compatibility-check:22.4.1.1
+
+insert into function file('02268_data.jsonl', 'TSV') select 1;
+select * from file('02268_data.jsonl'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+
+insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1;
+select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+

From 6dbc138e76bb7a75aa2085d7ca69235264e768b6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 13 Apr 2022 11:05:40 +0300
Subject: [PATCH 042/110] Remove duplicated parts removal from mutation.

This commit reverts 701e2ffd795814fbd0083d9b24de236774d224cd ("Fix
possible mutation stuck due to race with DROP_RANGE"), and also adds
some clarification comments.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index c5798aaefe5..c368aa3303d 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -357,9 +357,13 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval(
                 current_parts.remove(*drop_range_part_name);
             }
 
+            /// During inserting to queue (insertUnlocked()) we remove part for
+            /// DROP_RANGE only for DROP PART but not for DROP PARTITION.
             virtual_parts.remove(*drop_range_part_name);
 
-            removeCoveredPartsFromMutations(*drop_range_part_name, /*remove_part = */ true, /*remove_covered_parts = */ false);
+            /// NOTE: we don't need to remove part/covered parts from mutations (removeCoveredPartsFromMutations()) here because:
+            /// - for DROP PART we have this during inserting to queue (see insertUnlocked())
+            /// - for DROP PARTITION we have this in the loop above (when we adding parts to current_parts)
         }
 
         if (entry->type == LogEntry::DROP_RANGE)

From dc460f76d8e75d6f0f3c5a8bc02db7304b784db7 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 13 Apr 2022 15:58:15 +0000
Subject: [PATCH 043/110] Print progress bar on each ProfileEvents packet

---
 src/Client/ClientBase.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 929f0a48e2b..c9b7bb3a8ee 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -954,6 +954,9 @@ void ClientBase::onProfileEvents(Block & block)
         auto elapsed_time = profile_events.watch.elapsedMicroseconds();
         progress_indication.updateThreadEventData(thread_times, elapsed_time);
 
+        if (need_render_progress)
+            progress_indication.writeProgress();
+
         if (profile_events.print)
         {
             if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)

From 66fdf35dfdbd5d2c67545c08ba44eb46525213c6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 13 Apr 2022 18:01:22 +0200
Subject: [PATCH 044/110] remove outdated parts immediately on drop partition

---
 src/Storages/MergeTree/MergeTreeData.cpp    | 21 +++++++++++++++++----
 src/Storages/MergeTree/MergeTreeData.h      |  4 +++-
 src/Storages/StorageMergeTree.cpp           |  2 +-
 src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++-----
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e1e698d41e3..29b3083c38f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2878,8 +2878,7 @@ void MergeTreeData::removePartsFromWorkingSet(
 }
 
 MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
-        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range,
-        bool clear_without_timeout, DataPartsLock & lock)
+        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
 {
     DataPartsVector parts_to_remove;
 
@@ -2947,6 +2946,13 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
         parts_to_remove.emplace_back(part);
     }
 
+    bool clear_without_timeout = true;
+    /// We a going to remove active parts covered by drop_range without timeout.
+    /// Let's also reset timeout for inactive parts.
+    auto inactive_parts_to_remove_immediately = getDataPartsVectorInPartitionForInternalUsage(DataPartState::Outdated, drop_range.partition_id, &lock);
+    for (auto & part : inactive_parts_to_remove_immediately)
+        part->remove_time.store(0, std::memory_order_relaxed);
+
     /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
     removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
 
@@ -3382,7 +3388,8 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti
     return getVisibleDataPartsVectorInPartition(local_context->getCurrentTransaction().get(), partition_id);
 }
 
-MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(MergeTreeTransaction * txn, const String & partition_id, DataPartsLock * acquired_lock) const
+MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(
+    MergeTreeTransaction * txn, const String & partition_id, DataPartsLock * acquired_lock) const
 {
     if (txn)
     {
@@ -3398,7 +3405,13 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti
         return res;
     }
 
-    DataPartStateAndPartitionID state_with_partition{MergeTreeDataPartState::Active, partition_id};
+    return getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, partition_id, acquired_lock);
+}
+
+MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartitionForInternalUsage(
+    const MergeTreeData::DataPartState & state, const String & partition_id, DataPartsLock * acquired_lock) const
+{
+    DataPartStateAndPartitionID state_with_partition{state, partition_id};
 
     auto lock = (acquired_lock) ? DataPartsLock() : lockParts();
     return DataPartsVector(
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 3a099c471c2..1601a6714d3 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -494,6 +494,8 @@ public:
     DataPartsVector getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const;
     DataPartsVector getVisibleDataPartsVectorInPartitions(ContextPtr local_context, const std::unordered_set<String> & partition_ids) const;
 
+    DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartState & state, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const;
+
     /// Returns the part with the given name and state or nullptr if no such part.
     DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states);
     DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states);
@@ -577,7 +579,7 @@ public:
     /// Removes all parts from the working set parts
     ///  for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block).
     /// Used in REPLACE PARTITION command;
-    DataPartsVector removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, bool clear_without_timeout,
+    DataPartsVector removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range,
                                                      DataPartsLock & lock);
 
     /// Restores Outdated part and adds it to working set
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index b25b47ac772..e322d8785fa 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1618,7 +1618,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
 
             /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block
             if (replace)
-                removePartsInRangeFromWorkingSet(local_context->getCurrentTransaction().get(), drop_range, true, data_parts_lock);
+                removePartsInRangeFromWorkingSet(local_context->getCurrentTransaction().get(), drop_range, data_parts_lock);
         }
 
         PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 0e61d8b6f81..243637d910b 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1886,7 +1886,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
     DataPartsVector parts_to_remove;
     {
         auto data_parts_lock = lockParts();
-        parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range_info, true, data_parts_lock);
+        parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock);
         if (parts_to_remove.empty())
         {
             if (!drop_range_info.isFakeDropRangePart())
@@ -2019,7 +2019,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
 
         if (parts_to_add.empty() && replace)
         {
-            parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock);
+            parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
             String parts_to_remove_str;
             for (const auto & part : parts_to_remove)
             {
@@ -2257,7 +2257,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
             transaction.commit(&data_parts_lock);
             if (replace)
             {
-                parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock);
+                parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
                 String parts_to_remove_str;
                 for (const auto & part : parts_to_remove)
                 {
@@ -6487,7 +6487,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                 auto data_parts_lock = lockParts();
                 transaction.commit(&data_parts_lock);
                 if (replace)
-                    parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock);
+                    parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
             }
 
             PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
@@ -6694,7 +6694,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                 else
                     zkutil::KeeperMultiException::check(code, ops, op_results);
 
-                parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, lock);
+                parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, lock);
                 transaction.commit(&lock);
             }
 

From 42726639f34567b22e24ae4bafb506c1ef3b808c Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 13 Apr 2022 19:27:38 +0000
Subject: [PATCH 045/110] Check ORC/Parquet/Arrow format magic bytes before
 loading file in memory

---
 .../Formats/Impl/ArrowBlockInputFormat.cpp    |  2 +-
 .../Formats/Impl/ArrowBufferedStreams.cpp     | 26 +++++++++++++++++--
 .../Formats/Impl/ArrowBufferedStreams.h       | 11 +++++++-
 .../Formats/Impl/ORCBlockInputFormat.cpp      |  2 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  2 +-
 src/Storages/Hive/HiveFile.cpp                |  4 +--
 tests/integration/test_storage_s3/test.py     | 15 +++++++++++
 7 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index 792ebd09392..07331d82bb8 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -111,7 +111,7 @@ static std::shared_ptr<arrow::RecordBatchReader> createStreamReader(ReadBuffer &
 
 static std::shared_ptr<arrow::ipc::RecordBatchFileReader> createFileReader(ReadBuffer & in, const FormatSettings & format_settings, std::atomic<int> & is_stopped)
 {
-    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Arrow", ARROW_MAGIC_BYTES);
     if (is_stopped)
         return nullptr;
 
diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp
index 484a3a17f8f..8573a560d02 100644
--- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp
+++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp
@@ -9,6 +9,7 @@
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
+#include <IO/PeekableReadBuffer.h>
 #include <arrow/buffer.h>
 #include <arrow/io/memory.h>
 #include <arrow/result.h>
@@ -22,6 +23,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_FILE_SIZE;
+    extern const int INCORRECT_DATA;
 }
 
 ArrowBufferedOutputStream::ArrowBufferedOutputStream(WriteBuffer & out_) : out{out_}, is_open{true}
@@ -139,7 +141,12 @@ arrow::Status ArrowInputStreamFromReadBuffer::Close()
     return arrow::Status();
 }
 
-std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in, const FormatSettings & settings, std::atomic<int> & is_cancelled)
+std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(
+    ReadBuffer & in,
+    const FormatSettings & settings,
+    std::atomic<int> & is_cancelled,
+    const std::string & format_name,
+    const std::string & magic_bytes)
 {
     if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))
     {
@@ -158,8 +165,23 @@ std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in, const
     // fallback to loading the entire file in memory
     std::string file_data;
     {
+        PeekableReadBuffer buf(in);
+        std::string magic_bytes_from_data;
+        magic_bytes_from_data.resize(magic_bytes.size());
+        bool read_magic_bytes = false;
+        try
+        {
+            PeekableReadBufferCheckpoint checkpoint(buf, true);
+            buf.readStrict(magic_bytes_from_data.data(), magic_bytes_from_data.size());
+            read_magic_bytes = true;
+        }
+        catch (const Exception &) {}
+
+        if (!read_magic_bytes || magic_bytes_from_data != magic_bytes)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Not a {} file", format_name);
+
         WriteBufferFromString file_buffer(file_data);
-        copyData(in, file_buffer, is_cancelled);
+        copyData(buf, file_buffer, is_cancelled);
     }
 
     return std::make_shared<arrow::io::BufferReader>(arrow::Buffer::FromString(std::move(file_data)));
diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h
index e06eab04f1b..4ad0ecdf012 100644
--- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h
+++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h
@@ -6,6 +6,10 @@
 #include <arrow/io/interfaces.h>
 #include <optional>
 
+#define ORC_MAGIC_BYTES "ORC"
+#define PARQUET_MAGIC_BYTES "PAR1"
+#define ARROW_MAGIC_BYTES "ARROW1"
+
 namespace DB
 {
 
@@ -86,7 +90,12 @@ private:
     ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
 };
 
-std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in, const FormatSettings & settings, std::atomic<int> & is_cancelled);
+std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(
+    ReadBuffer & in,
+    const FormatSettings & settings,
+    std::atomic<int> & is_cancelled,
+    const std::string & format_name,
+    const std::string & magic_bytes);
 
 }
 
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index 333129aee81..1531c0d2794 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -116,7 +116,7 @@ static void getFileReaderAndSchema(
     const FormatSettings & format_settings,
     std::atomic<int> & is_stopped)
 {
-    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "ORC", ORC_MAGIC_BYTES);
     if (is_stopped)
         return;
 
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index af16d30bcfe..86987c665e0 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -117,7 +117,7 @@ static void getFileReaderAndSchema(
     const FormatSettings & format_settings,
     std::atomic<int> & is_stopped)
 {
-    auto arrow_file = asArrowFile(in, format_settings, is_stopped);
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES);
     if (is_stopped)
         return;
     THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp
index 02c92770274..57acbdd577b 100644
--- a/src/Storages/Hive/HiveFile.cpp
+++ b/src/Storages/Hive/HiveFile.cpp
@@ -150,7 +150,7 @@ void HiveORCFile::prepareReader()
     in = std::make_unique<ReadBufferFromHDFS>(namenode_url, path, getContext()->getGlobalContext()->getConfigRef());
     auto format_settings = getFormatSettings(getContext());
     std::atomic<int> is_stopped{0};
-    auto result = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings, is_stopped), arrow::default_memory_pool());
+    auto result = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings, is_stopped, "ORC", ORC_MAGIC_BYTES), arrow::default_memory_pool());
     THROW_ARROW_NOT_OK(result.status());
     reader = std::move(result).ValueOrDie();
 }
@@ -270,7 +270,7 @@ void HiveParquetFile::prepareReader()
     in = std::make_unique<ReadBufferFromHDFS>(namenode_url, path, getContext()->getGlobalContext()->getConfigRef());
     auto format_settings = getFormatSettings(getContext());
     std::atomic<int> is_stopped{0};
-    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped), arrow::default_memory_pool(), &reader));
+    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), arrow::default_memory_pool(), &reader));
 }
 
 void HiveParquetFile::loadSplitMinMaxIndexesImpl()
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 18cf7cc67ab..9b0cc3cdea8 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1428,3 +1428,18 @@ def test_parallel_reading_with_memory_limit(started_cluster):
     # Check that server didn't crash
     result = instance.query("select 1")
     assert int(result) == 1
+
+
+def test_wrong_format_usage(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_wrong_format.native') select * from numbers(10)"
+    )
+
+    result = instance.query_and_get_error(
+        f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_wrong_format.native', 'Parquet') settings input_format_allow_seeks=0, max_memory_usage=1000"
+    )
+
+    assert "Not a Parquet file" in result

From fd23c42ea743363f161c85dfb566b82f09b3dd85 Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@clickhouse.com>
Date: Wed, 13 Apr 2022 22:33:55 +0300
Subject: [PATCH 046/110] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 243637d910b..db1de14f6a6 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1812,7 +1812,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
 
             if (!parts_for_merge.empty() && replica.empty())
             {
-                LOG_INFO(log, " {}. Will fetch merged part instead.", entry.new_part_name);
+                LOG_INFO(log, "No active replica has part {}. Will fetch merged part instead.", entry.new_part_name);
                 /// We should enqueue it for check, because merged part may never appear if source part is lost
                 enqueuePartForCheck(entry.new_part_name);
                 return false;

From b2cd23658a30b7d11ec17feacaffc83d7863ca1d Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Wed, 13 Apr 2022 22:45:34 +0300
Subject: [PATCH 047/110] Revert "Fix crash in ParallelReadBuffer"

---
 src/IO/ParallelReadBuffer.cpp             |  6 +++++-
 tests/integration/test_storage_s3/test.py | 21 ---------------------
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp
index 79a16347094..f036d6a08c8 100644
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@@ -33,7 +33,6 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer
 
     auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
 
-    ++active_working_reader;
     schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); });
 
     return true;
@@ -204,6 +203,11 @@ bool ParallelReadBuffer::nextImpl()
 
 void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
 {
+    {
+        std::lock_guard lock{mutex};
+        ++active_working_reader;
+    }
+
     SCOPE_EXIT({
         std::lock_guard lock{mutex};
         --active_working_reader;
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 18cf7cc67ab..e32ddd2782b 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1407,24 +1407,3 @@ def test_insert_select_schema_inference(started_cluster):
         f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')"
     )
     assert int(result) == 1
-
-
-def test_parallel_reading_with_memory_limit(started_cluster):
-    bucket = started_cluster.minio_bucket
-    instance = started_cluster.instances["dummy"]
-
-    instance.query(
-        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') select * from numbers(100000)"
-    )
-
-    result = instance.query_and_get_error(
-        f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') settings max_memory_usage=10000"
-    )
-
-    assert "Memory limit (for query) exceeded" in result
-
-    sleep(5)
-
-    # Check that server didn't crash
-    result = instance.query("select 1")
-    assert int(result) == 1

From 6e89fc4542bc08e269e6fe3fcc082da83b48b5ed Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Apr 2022 23:29:53 +0300
Subject: [PATCH 048/110] Update clickhouse-keeper.md

---
 docs/en/operations/clickhouse-keeper.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md
index a8ca2079070..26d61dabaf9 100644
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@@ -3,13 +3,10 @@ toc_priority: 66
 toc_title: ClickHouse Keeper
 ---
 
-# [pre-production] ClickHouse Keeper {#clickHouse-keeper}
+# ClickHouse Keeper {#clickHouse-keeper}
 
 ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
 
-!!! warning "Warning"
-    This feature is currently in the pre-production stage. We test it in our CI and on small internal installations.
-
 ## Implementation details {#implementation-details}
 
 ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.

From ca14a29dbeb6a01303986b6fb5a127db66cc3cf4 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 13 Apr 2022 20:39:12 +0000
Subject: [PATCH 049/110] Revert reverting "Fix crash in ParallelReadBuffer"

---
 src/IO/ParallelReadBuffer.cpp             |  6 +-----
 tests/integration/test_storage_s3/test.py | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp
index f036d6a08c8..79a16347094 100644
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@@ -33,6 +33,7 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer
 
     auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
 
+    ++active_working_reader;
     schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); });
 
     return true;
@@ -203,11 +204,6 @@ bool ParallelReadBuffer::nextImpl()
 
 void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
 {
-    {
-        std::lock_guard lock{mutex};
-        ++active_working_reader;
-    }
-
     SCOPE_EXIT({
         std::lock_guard lock{mutex};
         --active_working_reader;
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index e32ddd2782b..87f97e7454a 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1407,3 +1407,24 @@ def test_insert_select_schema_inference(started_cluster):
         f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')"
     )
     assert int(result) == 1
+
+
+def test_parallel_reading_with_memory_limit(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') select * from numbers(1000000)"
+    )
+
+    result = instance.query_and_get_error(
+        f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') settings max_memory_usage=1000"
+    )
+
+    assert "Memory limit (for query) exceeded" in result
+
+    time.sleep(5)
+
+    # Check that server didn't crash
+    result = instance.query("select 1")
+    assert int(result) == 1

From 01a1babd3b4966678144fe045f64b779f44307fb Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@clickhouse.com>
Date: Wed, 13 Apr 2022 23:56:34 +0300
Subject: [PATCH 050/110] Update ci_config.py

---
 tests/ci/ci_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b4f13817896..a4847f33d45 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -282,6 +282,7 @@ CI_CONFIG = {
         },
         "Stateless tests (release, s3 storage, actions)": {
             "required_build": "package_release",
+            "force_tests": True,
         },
         "Stress test (address, actions)": {
             "required_build": "package_asan",

From 0595b5c22b98324089633d0d75ad59ebe78ee768 Mon Sep 17 00:00:00 2001
From: Anton Popov <ap@clickhouse.com>
Date: Wed, 13 Apr 2022 21:50:57 +0000
Subject: [PATCH 051/110] fix reading of empty arrays in reverse order

---
 .../CompressedReadBufferFromFile.cpp          |  2 +-
 .../02267_empty_arrays_read_reverse.reference |  1 +
 .../02267_empty_arrays_read_reverse.sql       | 22 +++++++++++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02267_empty_arrays_read_reverse.reference
 create mode 100644 tests/queries/0_stateless/02267_empty_arrays_read_reverse.sql

diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index 9efb3c92cde..a959da2c3ae 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -62,7 +62,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
 {
     /// Nothing to do if we already at required position
     if (!size_compressed && static_cast<size_t>(file_in.getPosition()) == offset_in_compressed_file && /// correct position in compressed file
-        (offset() == offset_in_decompressed_block /// correct position in buffer or
+        ((!buffer().empty() && offset() == offset_in_decompressed_block)     /// correct position in buffer or
          || nextimpl_working_buffer_offset == offset_in_decompressed_block)) /// we will move our position to correct one
         return;
 
diff --git a/tests/queries/0_stateless/02267_empty_arrays_read_reverse.reference b/tests/queries/0_stateless/02267_empty_arrays_read_reverse.reference
new file mode 100644
index 00000000000..cc0cc5714d9
--- /dev/null
+++ b/tests/queries/0_stateless/02267_empty_arrays_read_reverse.reference
@@ -0,0 +1 @@
+['x']	0	['1','2','3','4','5','6']
diff --git a/tests/queries/0_stateless/02267_empty_arrays_read_reverse.sql b/tests/queries/0_stateless/02267_empty_arrays_read_reverse.sql
new file mode 100644
index 00000000000..7c1cf47c540
--- /dev/null
+++ b/tests/queries/0_stateless/02267_empty_arrays_read_reverse.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS t_02267;
+
+CREATE TABLE t_02267
+(
+    a Array(String),
+    b UInt32,
+    c Array(String)
+)
+ENGINE = MergeTree
+ORDER BY b
+SETTINGS index_granularity = 500;
+
+INSERT INTO t_02267 (b, a, c) SELECT 0, ['x'],  ['1','2','3','4','5','6'] FROM numbers(1) ;
+INSERT INTO t_02267 (b, a, c) SELECT 1, [],     ['1','2','3','4','5','6'] FROM numbers(300000);
+
+OPTIMIZE TABLE t_02267 FINAL;
+
+SELECT * FROM t_02267 WHERE hasAll(a, ['x'])
+ORDER BY b DESC
+SETTINGS max_threads=1, max_block_size=1000;
+
+DROP TABLE IF EXISTS t_02267;

From 4cce325941d7bc5df35a6ee7d5ec2b2449c22715 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@tylerhannan.com>
Date: Thu, 14 Apr 2022 00:25:14 +0200
Subject: [PATCH 052/110] Add Tyler Hannan to contributors

---
 tests/ci/run_check.py                         | 1 +
 tests/ci/workflow_approve_rerun_lambda/app.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index d76e28f96f9..183e47bbc87 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -80,6 +80,7 @@ TRUSTED_CONTRIBUTORS = {
         "thomoco",  # ClickHouse
         "BoloniniD",  # Seasoned contributor, HSE
         "tonickkozlov",  # Cloudflare
+        "tylerhannan",  # ClickHouse Employee
     ]
 }
 
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index b650d1651fe..7825bad141f 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -121,6 +121,7 @@ TRUSTED_CONTRIBUTORS = {
         "YiuRULE",
         "zlobober",  # Developer of YT
         "BoloniniD",  # Seasoned contributor, HSE
+        "tylerhannan", # ClickHouse Employee
     ]
 }
 

From d2a7c4541edfa8e58eb8e0c84596488797b55e7b Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@tylerhannan.com>
Date: Thu, 14 Apr 2022 00:36:38 +0200
Subject: [PATCH 053/110] whitespace

---
 tests/ci/workflow_approve_rerun_lambda/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 7825bad141f..79e6e300bca 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -121,7 +121,7 @@ TRUSTED_CONTRIBUTORS = {
         "YiuRULE",
         "zlobober",  # Developer of YT
         "BoloniniD",  # Seasoned contributor, HSE
-        "tylerhannan", # ClickHouse Employee
+        "tylerhannan",  # ClickHouse Employee
     ]
 }
 

From 281ce4bfc746c106be4334a60584ae8a0d8118d2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 14 Apr 2022 03:04:34 +0300
Subject: [PATCH 054/110] Update http.md

---
 docs/en/interfaces/http.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index d72fb4d6f17..ca2a165bbd8 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -5,11 +5,10 @@ toc_title: HTTP Interface
 
 # HTTP Interface {#http-interface}
 
-The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility.
+The HTTP interface lets you use ClickHouse on any platform from any programming language in a form of REST API. The HTTP interface is more limited than the native interface, but it has better language support.
 
 By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
-
-Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
+HTTPS can be enabled as well with port 8443 by default.
 
 If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
 
@@ -18,11 +17,12 @@ $ curl 'http://localhost:8123/'
 Ok.
 ```
 
+Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
+
 Web UI can be accessed here: `http://localhost:8123/play`.
 
 ![Web UI](../images/play.png)
 
-
 In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
 
 ``` bash
@@ -32,7 +32,7 @@ $ curl 'http://localhost:8123/replicas_status'
 Ok.
 ```
 
-Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.
+Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 1 MiB by default, this can be changed with the `http_max_uri_size` setting.
 
 If successful, you receive the 200 response code and the result in the response body.
 If an error occurs, you receive the 500 response code and an error description text in the response body.

From 427c73e7a52655246da99aa0ef981b1101e23b95 Mon Sep 17 00:00:00 2001
From: jewisliu <jewisliu@tencent.com>
Date: Wed, 13 Apr 2022 10:55:45 +0800
Subject: [PATCH 055/110] fix Enum overflow

---
 src/DataTypes/DataTypeEnum.cpp                    | 13 +++++++++++--
 tests/queries/0_stateless/00757_enum_defaults.sql |  4 ++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp
index e292897e451..9d1424d862b 100644
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@@ -193,19 +193,28 @@ static void checkASTStructure(const ASTPtr & child)
 
 static void autoAssignNumberForEnum(const ASTPtr & arguments)
 {
-    Int16 child_count = 1;
+    UInt64 literal_child_count = 0;
+    UInt64 func_child_count = 0;
     ASTs assign_number_child;
     assign_number_child.reserve(arguments->children.size());
     for (const ASTPtr & child : arguments->children)
     {
         if (child->as<ASTLiteral>())
         {
-            ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(child_count++));
+            ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(++literal_child_count));
             assign_number_child.emplace_back(func);
         }
         else
+        {
+            ++func_child_count;
             assign_number_child.emplace_back(child);
+        }
     }
+
+    if (func_child_count > 0 && literal_child_count > 0)
+        throw Exception("ALL Elements of Enum data type must be of form: 'name' = number or 'name', where name is string literal and number is an integer",
+                        ErrorCodes::UNEXPECTED_AST_STRUCTURE);
+
     arguments->children = assign_number_child;
 }
 
diff --git a/tests/queries/0_stateless/00757_enum_defaults.sql b/tests/queries/0_stateless/00757_enum_defaults.sql
index 01f795b31f9..769579ffc0b 100644
--- a/tests/queries/0_stateless/00757_enum_defaults.sql
+++ b/tests/queries/0_stateless/00757_enum_defaults.sql
@@ -2,7 +2,11 @@ select os_name, count() from (SELECT CAST('iphone' AS Enum8('iphone' = 1, 'andro
 select toNullable(os_name) AS os_name, count() from (SELECT CAST('iphone' AS Enum8('iphone' = 1, 'android' = 2)) AS os_name) group by os_name WITH TOTALS;
 
 DROP TABLE IF EXISTS auto_assgin_enum;
+DROP TABLE IF EXISTS auto_assgin_enum1;
+
 CREATE TABLE auto_assgin_enum (x enum('a', 'b')) ENGINE=MergeTree() order by x;
+CREATE TABLE auto_assgin_enum1 (x enum('a' = 1, 'b')) ENGINE=MergeTree() order by x; -- { serverError 223 }
 INSERT INTO auto_assgin_enum VALUES('a'), ('b');
 select * from auto_assgin_enum;
+
 DROP TABLE auto_assgin_enum;

From 323cf6c5bf25cbaa350d0e1a0a194642e6790485 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 13:01:25 +0200
Subject: [PATCH 056/110] Merge TRUSTED_CONTRIBUTORS in lambda and import in
 check

---
 tests/ci/run_check.py                         | 74 ++-----------------
 .../workflow_approve_rerun_lambda/__init__.py |  1 +
 tests/ci/workflow_approve_rerun_lambda/app.py |  5 ++
 3 files changed, 14 insertions(+), 66 deletions(-)
 create mode 100644 tests/ci/workflow_approve_rerun_lambda/__init__.py

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 183e47bbc87..69df4a87563 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -5,10 +5,12 @@ import re
 from typing import Tuple
 
 from github import Github
-from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
-from pr_info import PRInfo
-from get_robot_token import get_best_robot_token
+
 from commit_status_helper import get_commit, post_labels, remove_labels
+from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
 
 NAME = "Run Check (actions)"
 
@@ -24,70 +26,12 @@ DO_NOT_TEST_LABEL = "do not test"
 FORCE_TESTS_LABEL = "force tests"
 SUBMODULE_CHANGED_LABEL = "submodule changed"
 
-# Individual trusted contirbutors who are not in any trusted organization.
-# Can be changed in runtime: we will append users that we learned to be in
-# a trusted org, to save GitHub API calls.
-TRUSTED_CONTRIBUTORS = {
-    e.lower()
-    for e in [
-        "achimbab",
-        "adevyatova ",  # DOCSUP
-        "Algunenano",  # Raúl Marín, Tinybird
-        "amosbird",
-        "AnaUvarova",  # DOCSUP
-        "anauvarova",  # technical writer, Yandex
-        "annvsh",  # technical writer, Yandex
-        "atereh",  # DOCSUP
-        "azat",
-        "bharatnc",  # Newbie, but already with many contributions.
-        "bobrik",  # Seasoned contributor, CloudFlare
-        "BohuTANG",
-        "codyrobert",  # Flickerbox engineer
-        "cwurm",  # Employee
-        "damozhaeva",  # DOCSUP
-        "den-crane",
-        "flickerbox-tom",  # Flickerbox
-        "gyuton",  # technical writer, Yandex
-        "hagen1778",  # Roman Khavronenko, seasoned contributor
-        "hczhcz",
-        "hexiaoting",  # Seasoned contributor
-        "ildus",  # adjust, ex-pgpro
-        "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
-        "ka1bi4",  # DOCSUP
-        "kirillikoff",  # DOCSUP
-        "kitaisreal",  # Seasoned contributor
-        "kreuzerkrieg",
-        "lehasm",  # DOCSUP
-        "michon470",  # DOCSUP
-        "MyroTk",  # Tester in Altinity
-        "myrrc",  # Michael Kot, Altinity
-        "nikvas0",
-        "nvartolomei",
-        "olgarev",  # DOCSUP
-        "otrazhenia",  # Yandex docs contractor
-        "pdv-ru",  # DOCSUP
-        "podshumok",  # cmake expert from QRator Labs
-        "s-mx",  # Maxim Sabyanin, former employee, present contributor
-        "sevirov",  # technical writer, Yandex
-        "spongedu",  # Seasoned contributor
-        "taiyang-li",
-        "ucasFL",  # Amos Bird's friend
-        "vdimir",  # Employee
-        "vzakaznikov",
-        "YiuRULE",
-        "zlobober",  # Developer of YT
-        "ilejn",  # Arenadata, responsible for Kerberized Kafka
-        "thomoco",  # ClickHouse
-        "BoloniniD",  # Seasoned contributor, HSE
-        "tonickkozlov",  # Cloudflare
-        "tylerhannan",  # ClickHouse Employee
-    ]
-}
 
 MAP_CATEGORY_TO_LABEL = {
     "New Feature": "pr-feature",
     "Bug Fix": "pr-bugfix",
-    "Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
+    "Bug Fix (user-visible misbehaviour in official "
+    "stable or prestable release)": "pr-bugfix",
     "Improvement": "pr-improvement",
     "Performance Improvement": "pr-performance",
     "Backward Incompatible Change": "pr-backward-incompatible",
@@ -257,9 +201,7 @@ if __name__ == "__main__":
     elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
         pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
 
-    print(
-        "change labels: add {}, remove {}".format(pr_labels_to_add, pr_labels_to_remove)
-    )
+    print(f"change labels: add {pr_labels_to_add}, remove {pr_labels_to_remove}")
     if pr_labels_to_add:
         post_labels(gh, pr_info, pr_labels_to_add)
 
diff --git a/tests/ci/workflow_approve_rerun_lambda/__init__.py b/tests/ci/workflow_approve_rerun_lambda/__init__.py
new file mode 100644
index 00000000000..4265cc3e6c1
--- /dev/null
+++ b/tests/ci/workflow_approve_rerun_lambda/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 79e6e300bca..98a686fe3f9 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -89,9 +89,11 @@ TRUSTED_CONTRIBUTORS = {
         "bharatnc",  # Newbie, but already with many contributions.
         "bobrik",  # Seasoned contributor, CloudFlare
         "BohuTANG",
+        "codyrobert",  # Flickerbox engineer
         "cwurm",  # Employee
         "damozhaeva",  # DOCSUP
         "den-crane",
+        "flickerbox-tom",  # Flickerbox
         "gyuton",  # DOCSUP
         "hagen1778",  # Roman Khavronenko, seasoned contributor
         "hczhcz",
@@ -120,7 +122,10 @@ TRUSTED_CONTRIBUTORS = {
         "vzakaznikov",
         "YiuRULE",
         "zlobober",  # Developer of YT
+        "ilejn",  # Arenadata, responsible for Kerberized Kafka
+        "thomoco",  # ClickHouse
         "BoloniniD",  # Seasoned contributor, HSE
+        "tonickkozlov",  # Cloudflare
         "tylerhannan",  # ClickHouse Employee
     ]
 }

From 5f31185125ac96b67dffa0911cbe7a09e5da53f8 Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@clickhouse.com>
Date: Thu, 14 Apr 2022 14:35:46 +0300
Subject: [PATCH 057/110] Update stress

---
 docker/test/stress/stress | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index e42587e9a4d..d78de84f60d 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -83,15 +83,15 @@ def make_query_command(query):
 def prepare_for_hung_check(drop_databases):
     # FIXME this function should not exist, but...
 
-    # ThreadFuzzer significantly slows down server and causes false-positive hung check failures
-    call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
-
     # We attach gdb to clickhouse-server before running tests
     # to print stacktraces of all crashes even if clickhouse cannot print it for some reason.
     # However, it obstruct checking for hung queries.
     logging.info("Will terminate gdb (if any)")
     call_with_retry("kill -TERM $(pidof gdb)")
 
+    # ThreadFuzzer significantly slows down server and causes false-positive hung check failures
+    call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
+
     call_with_retry(make_query_command('SELECT 1 FORMAT Null'))
 
     # Some tests execute SYSTEM STOP MERGES or similar queries.

From 3e42a1711dfec2a77dfe92d945b97e52526745b2 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 14 Apr 2022 13:17:04 +0200
Subject: [PATCH 058/110] Fix

---
 src/Common/FileSegment.cpp                    | 30 ++++++++++++++-----
 src/Common/FileSegment.h                      |  1 +
 src/Disks/IO/CachedReadBufferFromRemoteFS.cpp |  3 ++
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 50a1ecb4a6c..3729b87e823 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -163,6 +163,11 @@ bool FileSegment::isDownloader() const
     return getCallerId() == downloader_id;
 }
 
+bool FileSegment::isDownloaderImpl(std::lock_guard<std::mutex> & /* segment+_lock */) const
+{
+    return getCallerId() == downloader_id;
+}
+
 FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader()
 {
     if (!isDownloader())
@@ -397,6 +402,9 @@ bool FileSegment::reserve(size_t size)
 
 void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */)
 {
+    if (is_downloaded)
+        return;
+
     download_state = State::DOWNLOADED;
     is_downloaded = true;
     downloader_id.clear();
@@ -426,8 +434,7 @@ void FileSegment::completeBatchAndResetDownloader()
 {
     std::lock_guard segment_lock(mutex);
 
-    bool is_downloader = downloader_id == getCallerId();
-    if (!is_downloader)
+    if (!isDownloaderImpl(segment_lock))
     {
         cv.notify_all();
         throw Exception(
@@ -448,7 +455,7 @@ void FileSegment::complete(State state)
     std::lock_guard cache_lock(cache->mutex);
     std::lock_guard segment_lock(mutex);
 
-    bool is_downloader = downloader_id == getCallerId();
+    bool is_downloader = isDownloaderImpl(segment_lock);
     if (!is_downloader)
     {
         cv.notify_all();
@@ -465,6 +472,9 @@ void FileSegment::complete(State state)
                         "Cannot complete file segment with state: {}", stateToString(state));
     }
 
+    if (state == State::DOWNLOADED)
+        setDownloaded(segment_lock);
+
     download_state = state;
 
     assertNotDetached();
@@ -475,7 +485,7 @@ void FileSegment::complete(State state)
     }
     catch (...)
     {
-        if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
+        if (!downloader_id.empty() && is_downloader)
             downloader_id.clear();
 
         cv.notify_all();
@@ -492,8 +502,12 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
     if (download_state == State::SKIP_CACHE || detached)
         return;
 
-    if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
+    if (isDownloaderImpl(segment_lock)
+        && download_state != State::DOWNLOADED
+        && getDownloadedSize(segment_lock) == range().size())
+    {
         setDownloaded(segment_lock);
+    }
 
     assertNotDetached();
 
@@ -502,7 +516,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
         /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
         /// downloader or the only owner of the segment.
 
-        bool can_update_segment_state = downloader_id == getCallerIdImpl()
+        bool can_update_segment_state = isDownloaderImpl(segment_lock)
             || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
 
         if (can_update_segment_state)
@@ -515,7 +529,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
     }
     catch (...)
     {
-        if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
+        if (!downloader_id.empty() && isDownloaderImpl(segment_lock))
             downloader_id.clear();
 
         cv.notify_all();
@@ -561,7 +575,7 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
         }
     }
 
-    if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder))
+    if (!downloader_id.empty() && (isDownloaderImpl(segment_lock) || is_last_holder))
     {
         LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
         downloader_id.clear();
diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h
index 8439389fdeb..e3011eeb0fa 100644
--- a/src/Common/FileSegment.h
+++ b/src/Common/FileSegment.h
@@ -154,6 +154,7 @@ private:
 
     void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
     void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
+    bool isDownloaderImpl(std::lock_guard<std::mutex> & segment_lock) const;
 
     void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;
 
diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
index 90a1d4ad43e..e6188a96a33 100644
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
@@ -613,7 +613,10 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
             {
                 bool need_complete_file_segment = file_segment->isDownloader();
                 if (need_complete_file_segment)
+                {
+                    LOG_TEST(log, "Resetting downloader {} from scope exit", file_segment->getDownloader());
                     file_segment->completeBatchAndResetDownloader();
+                }
             }
             catch (...)
             {

From 428753a4a77135ad76a2b5f76f0b9c38bb5e74b6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Apr 2022 14:39:36 +0200
Subject: [PATCH 059/110] Fix integration tests parser

---
 tests/integration/ci-runner.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 05e56d2a910..a047c6c114c 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -461,6 +461,11 @@ class ClickhouseIntegrationTestsRunner:
                 if test not in main_counters[state]:
                     main_counters[state].append(test)
 
+        for state in ("SKIPPED",):
+            for test in current_counters[state]:
+                main_counters[state].append(test)
+
+
     def _get_runner_image_cmd(self, repo_path):
         image_cmd = ""
         if self._can_run_with(

From c74b6c547b6638648e1df14a6dd5478d993c5333 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Apr 2022 14:40:56 +0200
Subject: [PATCH 060/110] Black

---
 tests/integration/ci-runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index a047c6c114c..687c88b04a3 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -465,7 +465,6 @@ class ClickhouseIntegrationTestsRunner:
             for test in current_counters[state]:
                 main_counters[state].append(test)
 
-
     def _get_runner_image_cmd(self, repo_path):
         image_cmd = ""
         if self._can_run_with(

From 9947008d31f7b5031ca8170438853cb0f2e7be23 Mon Sep 17 00:00:00 2001
From: zhanghuajie <310336422@qq.com>
Date: Thu, 14 Apr 2022 22:49:49 +0800
Subject: [PATCH 061/110] fix crash when you use clickhouse-git-import with
 invalid parameter for example: clickhouse-git-import -xxx

---
 programs/compressor/Compressor.cpp | 64 +++++++++++++++---------------
 programs/format/Format.cpp         | 64 +++++++++++++++---------------
 programs/git-import/git-import.cpp |  2 +-
 3 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp
index d47372631fe..d0fc3528473 100644
--- a/programs/compressor/Compressor.cpp
+++ b/programs/compressor/Compressor.cpp
@@ -66,40 +66,40 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
     using namespace DB;
     namespace po = boost::program_options;
 
-    po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
-    desc.add_options()
-        ("help,h", "produce help message")
-        ("input", po::value<std::string>()->value_name("INPUT"), "input file")
-        ("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
-        ("decompress,d", "decompress")
-        ("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
-        ("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
-        ("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
-        ("hc", "use LZ4HC instead of LZ4")
-        ("zstd", "use ZSTD instead of LZ4")
-        ("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
-        ("level", po::value<int>(), "compression level for codecs specified via flags")
-        ("none", "use no compression instead of LZ4")
-        ("stat", "print block statistics of compressed data")
-    ;
-
-    po::positional_options_description positional_desc;
-    positional_desc.add("input", 1);
-    positional_desc.add("output", 1);
-
-    po::variables_map options;
-    po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
-
-    if (options.count("help"))
-    {
-        std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
-        std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
-        std::cout << desc << std::endl;
-        return 0;
-    }
-
     try
     {
+        po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
+        desc.add_options()
+            ("help,h", "produce help message")
+            ("input", po::value<std::string>()->value_name("INPUT"), "input file")
+            ("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
+            ("decompress,d", "decompress")
+            ("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
+            ("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
+            ("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
+            ("hc", "use LZ4HC instead of LZ4")
+            ("zstd", "use ZSTD instead of LZ4")
+            ("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
+            ("level", po::value<int>(), "compression level for codecs specified via flags")
+            ("none", "use no compression instead of LZ4")
+            ("stat", "print block statistics of compressed data")
+        ;
+
+        po::positional_options_description positional_desc;
+        positional_desc.add("input", 1);
+        positional_desc.add("output", 1);
+
+        po::variables_map options;
+        po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
+
+        if (options.count("help"))
+        {
+            std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
+            std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
+            std::cout << desc << std::endl;
+            return 0;
+        }
+
         bool decompress = options.count("decompress");
         bool use_lz4hc = options.count("hc");
         bool use_zstd = options.count("zstd");
diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp
index 50d85cdd43d..d5206da00f5 100644
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@@ -44,40 +44,40 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
 {
     using namespace DB;
 
-    boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
-    desc.add_options()
-        ("query", po::value<std::string>(), "query to format")
-        ("help,h", "produce help message")
-        ("hilite", "add syntax highlight with ANSI terminal escape sequences")
-        ("oneline", "format in single line")
-        ("quiet,q", "just check syntax, no output on success")
-        ("multiquery,n", "allow multiple queries in the same file")
-        ("obfuscate", "obfuscate instead of formatting")
-        ("backslash", "add a backslash at the end of each line of the formatted query")
-        ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
-        ("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
-    ;
-
-    Settings cmd_settings;
-    for (const auto & field : cmd_settings.all())
-    {
-        if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
-            cmd_settings.addProgramOption(desc, field);
-    }
-
-    boost::program_options::variables_map options;
-    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
-    po::notify(options);
-
-    if (options.count("help"))
-    {
-        std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
-        std::cout << desc << std::endl;
-        return 1;
-    }
-
     try
     {
+        boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
+        desc.add_options()
+            ("query", po::value<std::string>(), "query to format")
+            ("help,h", "produce help message")
+            ("hilite", "add syntax highlight with ANSI terminal escape sequences")
+            ("oneline", "format in single line")
+            ("quiet,q", "just check syntax, no output on success")
+            ("multiquery,n", "allow multiple queries in the same file")
+            ("obfuscate", "obfuscate instead of formatting")
+            ("backslash", "add a backslash at the end of each line of the formatted query")
+            ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
+            ("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
+        ;
+
+        Settings cmd_settings;
+        for (const auto & field : cmd_settings.all())
+        {
+            if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
+                cmd_settings.addProgramOption(desc, field);
+        }
+
+        boost::program_options::variables_map options;
+        boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
+        po::notify(options);
+
+        if (options.count("help"))
+        {
+            std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
+            std::cout << desc << std::endl;
+            return 1;
+        }
+
         bool hilite = options.count("hilite");
         bool oneline = options.count("oneline");
         bool quiet = options.count("quiet");
diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index 749dcbfee5f..18a9bb2627c 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -1231,5 +1231,5 @@ try
 catch (...)
 {
     std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
-    throw;
+    return DB::getCurrentExceptionCode();
 }

From 52a7544aa48d8978b072b991d612e557a3ff20e4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 13 Apr 2022 18:48:56 +0200
Subject: [PATCH 062/110] Fix cancel-lambda for closed PRs

- Create a fallback function for some edge cases
- Process synchronized PRs as help for cancel-workflow action
---
 .../cancel_and_rerun_workflow_lambda/app.py   | 114 +++++++++++++++---
 1 file changed, 94 insertions(+), 20 deletions(-)

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 4cce1e5024a..663d9100889 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -92,28 +92,33 @@ WorkflowDescription = namedtuple(
 )
 
 
-def get_workflows_description_for_pull_request(pull_request_event):
+def get_workflows_description_for_pull_request(
+    pull_request_event,
+) -> List[WorkflowDescription]:
     head_repo = pull_request_event["head"]["repo"]["full_name"]
     head_branch = pull_request_event["head"]["ref"]
-    head_sha = pull_request_event["head"]["sha"]
     print("PR", pull_request_event["number"], "has head ref", head_branch)
-    workflows_data = []
-    workflows = _exec_get_with_retry(
-        API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page=1"
-    )
-    workflows_data += workflows["workflow_runs"]
-    i = 2
-    while len(workflows["workflow_runs"]) > 0:
-        workflows = _exec_get_with_retry(
-            API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page={i}"
-        )
-        workflows_data += workflows["workflow_runs"]
-        i += 1
-        if i > 30:
-            print("Too many workflows found")
-            break
 
-    DEBUG_INFO["workflows"] = []  # type: List[Dict[str, str]]
+    workflows_data = []
+    request_url = f"{API_URL}/actions/runs?per_page=100"
+    # Get all workflows for the current branch
+    for i in range(1, 11):
+        workflows = _exec_get_with_retry(
+            f"{request_url}&event=pull_request&branch={head_branch}&page={i}"
+        )
+        if not workflows["workflow_runs"]:
+            break
+        workflows_data += workflows["workflow_runs"]
+        if i == 10:
+            print("Too many workflows found")
+
+    if not workflows_data:
+        print("No workflows found by filter")
+        return []
+
+    print(f"Total workflows for the branch {head_branch} found: {len(workflows_data)}")
+
+    DEBUG_INFO["workflows"] = []
     workflow_descriptions = []
     for workflow in workflows_data:
         # Some time workflow["head_repository"]["full_name"] is None
@@ -123,13 +128,13 @@ def get_workflows_description_for_pull_request(pull_request_event):
             {
                 "full_name": workflow["head_repository"]["full_name"],
                 "name": workflow["name"],
+                "branch": workflow["head_branch"],
             }
         )
         # unfortunately we cannot filter workflows from forks in request to API
         # so doing it manually
         if (
-            workflow["head_sha"] == head_sha
-            and workflow["head_repository"]["full_name"] == head_repo
+            workflow["head_repository"]["full_name"] == head_repo
             and workflow["name"] in NEED_RERUN_OR_CANCELL_WORKFLOWS
         ):
             workflow_descriptions.append(
@@ -144,6 +149,60 @@ def get_workflows_description_for_pull_request(pull_request_event):
     return workflow_descriptions
 
 
+def get_workflow_description_fallback(event_data) -> List[WorkflowDescription]:
+    pull_request_event = event_data["pull_request"]
+    head_repo = pull_request_event["head"]["repo"]["full_name"]
+    head_branch = pull_request_event["head"]["ref"]
+    head_sha = pull_request_event["head"]["sha"]
+    print("Get last 500 workflows from API to search related there")
+    # Fallback for a case of an already deleted branch and no workflows received
+    request_url = f"{API_URL}/actions/runs?per_page=100"
+    workflows_data = []
+    i = 1
+    for i in range(1, 6):
+        workflows = _exec_get_with_retry(f"{request_url}&page={i}")
+        if not workflows["workflow_runs"]:
+            break
+        # Prefilter workflows
+        workflows_data += [
+            wf
+            for wf in workflows["workflow_runs"]
+            if wf["head_repository"] is not None
+            and wf["head_repository"]["full_name"] == head_repo
+            and wf["head_branch"] == head_branch
+            and wf["name"] in NEED_RERUN_OR_CANCELL_WORKFLOWS
+        ]
+
+    print(f"Total workflows in last 500 actions matches: {len(workflows_data)}")
+
+    DEBUG_INFO["workflows"] = [
+        {
+            "full_name": wf["head_repository"]["full_name"],
+            "name": wf["name"],
+            "branch": wf["head_branch"],
+        }
+        for wf in workflows_data
+    ]
+    if event_data["action"] == "synchronize":
+        print(f"Leave only workflows with SHA but {head_sha} for updated PR")
+        # Cancel all events with SHA different than current
+        workflows_data = list(
+            filter(lambda x: x["head_sha"] != head_sha, workflows_data)
+        )
+
+    workflow_descriptions = [
+        WorkflowDescription(
+            run_id=wf["id"],
+            status=wf["status"],
+            rerun_url=wf["rerun_url"],
+            cancel_url=wf["cancel_url"],
+        )
+        for wf in workflows_data
+    ]
+
+    return workflow_descriptions
+
+
 def get_workflow_description(workflow_id):
     workflow = _exec_get_with_retry(API_URL + f"/actions/runs/{workflow_id}")
     return WorkflowDescription(
@@ -189,6 +248,21 @@ def main(event):
     if action == "closed" or "do not test" in labels:
         print("PR merged/closed or manually labeled 'do not test' will kill workflows")
         workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = (
+            workflow_descriptions or get_workflow_description_fallback(event_data)
+        )
+        urls_to_cancel = []
+        for workflow_description in workflow_descriptions:
+            if workflow_description.status != "completed":
+                urls_to_cancel.append(workflow_description.cancel_url)
+        print(f"Found {len(urls_to_cancel)} workflows to cancel")
+        exec_workflow_url(urls_to_cancel, token)
+    elif action == "synchronize":
+        print("PR is synchronized, going to stop old actions")
+        workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = (
+            workflow_descriptions or get_workflow_description_fallback(event_data)
+        )
         urls_to_cancel = []
         for workflow_description in workflow_descriptions:
             if workflow_description.status != "completed":

From 3e6dc24a7bbc6ac33e447018838dcc676ea8e769 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 19:35:29 +0200
Subject: [PATCH 063/110] Do not fail the whole lambda if one page failed

---
 tests/ci/cancel_and_rerun_workflow_lambda/app.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 663d9100889..e5decd5561b 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -160,7 +160,11 @@ def get_workflow_description_fallback(event_data) -> List[WorkflowDescription]:
     workflows_data = []
     i = 1
     for i in range(1, 6):
-        workflows = _exec_get_with_retry(f"{request_url}&page={i}")
+        try:
+            workflows = _exec_get_with_retry(f"{request_url}&page={i}")
+        except Exception as e:
+            print(f"Exception occured, still continue: {e}")
+            continue
         if not workflows["workflow_runs"]:
             break
         # Prefilter workflows

From 646fd45645951820544e08511728359e662c0c51 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 20:41:06 +0200
Subject: [PATCH 064/110] Request unfiltered workflows in threads

---
 .../cancel_and_rerun_workflow_lambda/app.py   | 42 +++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index e5decd5561b..cb68365d4d6 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -2,6 +2,8 @@
 
 from collections import namedtuple
 from typing import Any, Dict, List
+from threading import Thread
+from queue import Queue
 import json
 import time
 
@@ -25,6 +27,24 @@ MAX_RETRY = 5
 DEBUG_INFO = {}  # type: Dict[str, Any]
 
 
+class Worker(Thread):
+    def __init__(self, request_queue: Queue, ignore_exception: bool = False):
+        Thread.__init__(self)
+        self.queue = request_queue
+        self.ignore_exception = ignore_exception
+        self.response = {}  # type: Dict
+
+    def run(self):
+        m = self.queue.get()
+        try:
+            self.response = _exec_get_with_retry(m)
+        except Exception as e:
+            if not self.ignore_exception:
+                raise
+            print(f"Exception occured, still continue: {e}")
+        self.queue.task_done()
+
+
 def get_installation_id(jwt_token):
     headers = {
         "Authorization": f"Bearer {jwt_token}",
@@ -157,20 +177,25 @@ def get_workflow_description_fallback(event_data) -> List[WorkflowDescription]:
     print("Get last 500 workflows from API to search related there")
     # Fallback for a case of an already deleted branch and no workflows received
     request_url = f"{API_URL}/actions/runs?per_page=100"
+    q = Queue()  # type: Queue
+    workers = []
     workflows_data = []
     i = 1
     for i in range(1, 6):
-        try:
-            workflows = _exec_get_with_retry(f"{request_url}&page={i}")
-        except Exception as e:
-            print(f"Exception occured, still continue: {e}")
+        q.put(f"{request_url}&page={i}")
+        worker = Worker(q, True)
+        worker.start()
+        workers.append(worker)
+
+    for worker in workers:
+        worker.join()
+        if not worker.response:
+            # We ignore get errors, so response can be empty
             continue
-        if not workflows["workflow_runs"]:
-            break
         # Prefilter workflows
         workflows_data += [
             wf
-            for wf in workflows["workflow_runs"]
+            for wf in worker.response["workflow_runs"]
             if wf["head_repository"] is not None
             and wf["head_repository"]["full_name"] == head_repo
             and wf["head_branch"] == head_branch
@@ -276,6 +301,9 @@ def main(event):
     elif action == "labeled" and "can be tested" in labels:
         print("PR marked with can be tested label, rerun workflow")
         workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = (
+            workflow_descriptions or get_workflow_description_fallback(event_data)
+        )
         if not workflow_descriptions:
             print("Not found any workflows")
             return

From fad0aeb0d34ed065b7b68ec1510000519231623d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 20:43:49 +0200
Subject: [PATCH 065/110] Fix some linter issues

---
 tests/ci/team_keys_lambda/app.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py
index ad153664b86..50ef753001c 100644
--- a/tests/ci/team_keys_lambda/app.py
+++ b/tests/ci/team_keys_lambda/app.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
-import requests
 import argparse
 import json
 
 from threading import Thread
 from queue import Queue
 
+import requests  # type: ignore
+
 
 def get_org_team_members(token: str, org: str, team_slug: str) -> tuple:
     headers = {
@@ -37,7 +38,7 @@ def get_members_keys(members: tuple) -> str:
                 self.results.append(f"# {m}\n{response.text}")
                 self.queue.task_done()
 
-    q = Queue()
+    q = Queue()  # type: Queue
     workers = []
     for m in members:
         q.put(m)
@@ -61,7 +62,7 @@ def get_members_keys(members: tuple) -> str:
 
 
 def get_token_from_aws() -> str:
-    import boto3
+    import boto3  # type: ignore
 
     secret_name = "clickhouse_robot_token"
     session = boto3.session.Session()
@@ -81,6 +82,8 @@ def main(token: str, org: str, team_slug: str) -> str:
 
 
 def handler(event, context):
+    _ = context
+    _ = event
     token = get_token_from_aws()
     result = {
         "statusCode": 200,

From e68932298036adc855e618514c1d3db7a1b777f0 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 20:54:57 +0200
Subject: [PATCH 066/110] Improve logic to decide cancelling workflow

---
 .../cancel_and_rerun_workflow_lambda/app.py   | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index cb68365d4d6..27aff07fc0b 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -108,7 +108,7 @@ def _exec_get_with_retry(url):
 
 
 WorkflowDescription = namedtuple(
-    "WorkflowDescription", ["run_id", "status", "rerun_url", "cancel_url"]
+    "WorkflowDescription", ["run_id", "status", "rerun_url", "cancel_url", "conclusion"]
 )
 
 
@@ -163,6 +163,7 @@ def get_workflows_description_for_pull_request(
                     status=workflow["status"],
                     rerun_url=workflow["rerun_url"],
                     cancel_url=workflow["cancel_url"],
+                    conclusion=workflow["conclusion"],
                 )
             )
 
@@ -225,6 +226,7 @@ def get_workflow_description_fallback(event_data) -> List[WorkflowDescription]:
             status=wf["status"],
             rerun_url=wf["rerun_url"],
             cancel_url=wf["cancel_url"],
+            conclusion=wf["conclusion"],
         )
         for wf in workflows_data
     ]
@@ -232,13 +234,14 @@ def get_workflow_description_fallback(event_data) -> List[WorkflowDescription]:
     return workflow_descriptions
 
 
-def get_workflow_description(workflow_id):
+def get_workflow_description(workflow_id) -> WorkflowDescription:
     workflow = _exec_get_with_retry(API_URL + f"/actions/runs/{workflow_id}")
     return WorkflowDescription(
         run_id=workflow["id"],
         status=workflow["status"],
         rerun_url=workflow["rerun_url"],
         cancel_url=workflow["cancel_url"],
+        conclusion=workflow["conclusion"],
     )
 
 
@@ -282,7 +285,10 @@ def main(event):
         )
         urls_to_cancel = []
         for workflow_description in workflow_descriptions:
-            if workflow_description.status != "completed":
+            if (
+                workflow_description.status != "completed"
+                and workflow_description.conclusion != "cancelled"
+            ):
                 urls_to_cancel.append(workflow_description.cancel_url)
         print(f"Found {len(urls_to_cancel)} workflows to cancel")
         exec_workflow_url(urls_to_cancel, token)
@@ -294,7 +300,10 @@ def main(event):
         )
         urls_to_cancel = []
         for workflow_description in workflow_descriptions:
-            if workflow_description.status != "completed":
+            if (
+                workflow_description.status != "completed"
+                and workflow_description.conclusion != "cancelled"
+            ):
                 urls_to_cancel.append(workflow_description.cancel_url)
         print(f"Found {len(urls_to_cancel)} workflows to cancel")
         exec_workflow_url(urls_to_cancel, token)
@@ -311,7 +320,10 @@ def main(event):
         sorted_workflows = list(sorted(workflow_descriptions, key=lambda x: x.run_id))
         most_recent_workflow = sorted_workflows[-1]
         print("Latest workflow", most_recent_workflow)
-        if most_recent_workflow.status != "completed":
+        if (
+            most_recent_workflow.status != "completed"
+            and most_recent_workflow.conclusion != "cancelled"
+        ):
             print("Latest workflow is not completed, cancelling")
             exec_workflow_url([most_recent_workflow.cancel_url], token)
             print("Cancelled")

From 401efa5fd47f2bec48bed57f6ccf0acab13d3a27 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 14 Apr 2022 22:59:03 +0200
Subject: [PATCH 067/110] Improve docker building and logging for approve
 lambda

---
 tests/ci/workflow_approve_rerun_lambda/Dockerfile | 6 +++---
 tests/ci/workflow_approve_rerun_lambda/app.py     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ci/workflow_approve_rerun_lambda/Dockerfile b/tests/ci/workflow_approve_rerun_lambda/Dockerfile
index f53be71a893..0d50224c51d 100644
--- a/tests/ci/workflow_approve_rerun_lambda/Dockerfile
+++ b/tests/ci/workflow_approve_rerun_lambda/Dockerfile
@@ -1,13 +1,13 @@
 FROM public.ecr.aws/lambda/python:3.9
 
-# Copy function code
-COPY app.py ${LAMBDA_TASK_ROOT}
-
 # Install the function's dependencies using file requirements.txt
 # from your project folder.
 
 COPY requirements.txt  .
 RUN  pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
 
+# Copy function code
+COPY app.py ${LAMBDA_TASK_ROOT}
+
 # Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
 CMD [ "app.handler" ]
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 98a686fe3f9..27e28dc30e0 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -394,7 +394,7 @@ def rerun_workflow(workflow_description, token):
 def main(event):
     token = get_token_from_aws()
     event_data = json.loads(event["body"])
-    print("The body received:", event_data)
+    print("The body received:", event["body"])
     workflow_description = get_workflow_description_from_event(event_data)
 
     print("Got workflow description", workflow_description)

From dac469dfcab4b2cf04727e9fa6ef03368f7537a2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 15 Apr 2022 02:39:30 +0300
Subject: [PATCH 068/110] Update storing-data.md

---
 docs/en/operations/storing-data.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index beffd45bcbd..6cda47ab9fb 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -11,10 +11,6 @@ To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/
 
 To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
 
-## Zero-copy Replication {#zero-copy}
-
-ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
-
 ## Configuring HDFS {#configuring-hdfs}
 
 [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
@@ -316,3 +312,8 @@ When loading files by `endpoint`, they must be loaded into `<endpoint>/store/` p
 If URL is not reachable on disk load when the server is starting up tables, then all errors are caught. If in this case there were errors, tables can be reloaded (become visible) via `DETACH TABLE table_name` -> `ATTACH TABLE table_name`. If metadata was successfully loaded at server startup, then tables are available straight away.
 
 Use [http_max_single_read_retries](../operations/settings/settings.md#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
+
+
+## Zero-copy Replication (not ready for production) {#zero-copy}
+
+ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.

From efbb29539f62d14c6e620f87b43087964ccb23d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 03:13:40 +0200
Subject: [PATCH 069/110] Fix Play UI for some resultsets

---
 programs/server/play.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/play.html b/programs/server/play.html
index ef45ee1476b..146316ef61f 100644
--- a/programs/server/play.html
+++ b/programs/server/play.html
@@ -589,7 +589,7 @@
         stats.innerText = `Elapsed: ${seconds} sec, read ${formatted_rows} rows, ${formatted_bytes}.`;
 
         /// We can also render graphs if user performed EXPLAIN PIPELINE graph=1 or EXPLAIN AST graph = 1
-        if (response.data.length > 3 && response.data[0][0].startsWith("digraph") && document.getElementById('query').value.match(/^\s*EXPLAIN/i)) {
+        if (response.data.length > 3 && document.getElementById('query').value.match(/^\s*EXPLAIN/i) && typeof(response.data[0][0]) === "string" && response.data[0][0].startsWith("digraph")) {
             renderGraph(response);
         } else {
             renderTable(response);

From 40e5ba21a4265560fa2e46c5c90594832bd316f4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 15 Apr 2022 11:55:05 +0200
Subject: [PATCH 070/110] Update src/Common/FileSegment.cpp

---
 src/Common/FileSegment.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp
index 3729b87e823..9fe95c0d8cd 100644
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@@ -163,7 +163,7 @@ bool FileSegment::isDownloader() const
     return getCallerId() == downloader_id;
 }
 
-bool FileSegment::isDownloaderImpl(std::lock_guard<std::mutex> & /* segment+_lock */) const
+bool FileSegment::isDownloaderImpl(std::lock_guard<std::mutex> & /* segment_lock */) const
 {
     return getCallerId() == downloader_id;
 }

From b79d84acf2352f52afeea3dcf8692b9c0e66444b Mon Sep 17 00:00:00 2001
From: ifinik <i.finik@ya.ru>
Date: Fri, 15 Apr 2022 23:39:21 +0300
Subject: [PATCH 071/110] =?UTF-8?q?=D0=BE=D0=BF=D0=B5=D1=87=D0=B0=D1=82?=
 =?UTF-8?q?=D0=BA=D0=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

https://www.google.com/search?q=%D0%BF%D0%BE%D0%BC%D0%B5%D1%89%D0%B0%D0%B5%D1%81%D1%8F+site%3Agithub.com&biw=1920&bih=969&ei=0NZZYv-UEOrhrgS76YvoDw&ved=0ahUKEwj_vK2w9Zb3AhXqsIsKHbv0Av0Q4dUDCA4&uact=5&oq=%D0%BF%D0%BE%D0%BC%D0%B5%D1%89%D0%B0%D0%B5%D1%81%D1%8F+site%3Agithub.com&gs_lcp=Cgdnd3Mtd2l6EANKBAhBGAFKBAhGGABQthhYwxtg7S1oAnAAeACAAUOIAXKSAQEymAEAoAEBwAEB&sclient=gws-wiz
---
 docs/ru/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index c75fa8e92ce..f2a13569c23 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -182,7 +182,7 @@ Marks numbers:   0      1      2      3      4      5      6      7      8
 
 Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных.
 
-Разреженный индекс почти всегда помещаеся в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
+Разреженный индекс почти всегда помещается в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
 
 ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом.
 

From 58fc4e033ae143af6a5802d89787e25dad2c2e8e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 22:54:08 +0200
Subject: [PATCH 072/110] Remove trash

---
 programs/server/Server.cpp                 |  1 +
 src/Common/getNumberOfPhysicalCPUCores.cpp | 94 ++++++++++------------
 2 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index fc9187cb622..0120564e0b1 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1716,6 +1716,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     return Application::EXIT_OK;
 }
 
+
 void Server::createServers(
     Poco::Util::AbstractConfiguration & config,
     const std::vector<std::string> & listen_hosts,
diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index 1d26b522e64..872a2464eb6 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -12,75 +12,63 @@
 #include <thread>
 
 #if defined(OS_LINUX)
-unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
+static int readFrom(const char * filename, int default_value)
 {
-    // Try to look at cgroups limit if it is available.
-    auto read_from = [](const char * filename, int default_value) -> int {
-        std::ifstream infile(filename);
-        if (!infile.is_open())
-        {
-            return default_value;
-        }
-        int idata;
-        if (infile >> idata)
-            return idata;
-        else
-            return default_value;
-    };
+    std::ifstream infile(filename);
+    if (!infile.is_open())
+        return default_value;
+    int idata;
+    if (infile >> idata)
+        return idata;
+    else
+        return default_value;
+}
 
+/// Try to look at cgroups limit if it is available.
+static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
+{
     unsigned quota_count = default_cpu_count;
-    // Return the number of milliseconds per period process is guaranteed to run.
-    // -1 for no quota
-    int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
-    int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
+    /// Return the number of milliseconds per period process is guaranteed to run.
+    /// -1 for no quota
+    int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
+    int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
     if (cgroup_quota > -1 && cgroup_period > 0)
-    {
         quota_count = ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period));
-    }
 
     return std::min(default_cpu_count, quota_count);
 }
-#endif // OS_LINUX
+#endif
 
 unsigned getNumberOfPhysicalCPUCores()
 {
-    static const unsigned number = [] {
-        unsigned cpu_count = 0; // start with an invalid num
+    unsigned cpu_count = 0; // start with an invalid num
+
 #if USE_CPUID
-        do
-        {
-            cpu_raw_data_t raw_data;
-            cpu_id_t data;
+    cpu_raw_data_t raw_data;
+    cpu_id_t data;
 
-            /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
-            /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
-            if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0)
-            {
-                // Just fallback
-                break;
-            }
+    /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
+    /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
+    /// Also, libcpuid gives strange result on Google Compute Engine VMs.
+    /// Example:
+    ///  num_cores = 12,            /// number of physical cores on current CPU socket
+    ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
+    ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
+    /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
 
-            cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
-
-            /// Also, libcpuid gives strange result on Google Compute Engine VMs.
-            /// Example:
-            ///  num_cores = 12,            /// number of physical cores on current CPU socket
-            ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
-            ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
-            /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
-        } while (false);
+    if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0)
+        cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
 #endif
 
-        /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
-        /// (Actually, only Aarch64 is supported).
-        if (cpu_count == 0)
-            cpu_count = std::thread::hardware_concurrency();
+    /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
+    /// (Actually, only Aarch64 is supported).
+    if (cpu_count == 0)
+        cpu_count = std::thread::hardware_concurrency();
 
 #if defined(OS_LINUX)
-        /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
-        cpu_count = getCGroupLimitedCPUCores(cpu_count);
-#endif // OS_LINUX
-        return cpu_count;
-    }();
-    return number;
+    /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
+    cpu_count = getCGroupLimitedCPUCores(cpu_count);
+#endif
+
+    return cpu_count;
 }

From 364f10aff5f0b11a45aeb24eb933f13a87fdef5d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 22:56:51 +0200
Subject: [PATCH 073/110] Remove trash

---
 src/Common/getNumberOfPhysicalCPUCores.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index 872a2464eb6..f415ee58e85 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -39,7 +39,7 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
 }
 #endif
 
-unsigned getNumberOfPhysicalCPUCores()
+static unsigned getNumberOfPhysicalCPUCoresImpl()
 {
     unsigned cpu_count = 0; // start with an invalid num
 
@@ -72,3 +72,10 @@ unsigned getNumberOfPhysicalCPUCores()
 
     return cpu_count;
 }
+
+unsigned getNumberOfPhysicalCPUCores()
+{
+    /// Calculate once.
+    static auto res = getNumberOfPhysicalCPUCoresImpl();
+    return res;
+}

From 416fa95b8744825ec1c00c072b20bd404145198a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 23:57:09 +0200
Subject: [PATCH 074/110] Remove "Arcadia" build system

---
 src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h           | 2 --
 src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp | 2 --
 src/IO/ReadBufferFromAzureBlobStorage.cpp                   | 2 --
 src/IO/ReadBufferFromAzureBlobStorage.h                     | 2 --
 src/IO/WriteBufferFromAzureBlobStorage.cpp                  | 2 --
 src/IO/WriteBufferFromAzureBlobStorage.h                    | 2 --
 src/Server/CertificateReloader.h                            | 4 +---
 7 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
index 1cef6105d41..048daa7c9dc 100644
--- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
+++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
index 243452353d3..128c7534b3c 100644
--- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
+++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #include <Disks/DiskFactory.h>
 
diff --git a/src/IO/ReadBufferFromAzureBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp
index 0ce6db97437..593bd01023a 100644
--- a/src/IO/ReadBufferFromAzureBlobStorage.cpp
+++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h
index 78d973747ba..d743e725bda 100644
--- a/src/IO/ReadBufferFromAzureBlobStorage.h
+++ b/src/IO/ReadBufferFromAzureBlobStorage.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp
index 88882fcef65..0f8cfdf347d 100644
--- a/src/IO/WriteBufferFromAzureBlobStorage.cpp
+++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/WriteBufferFromAzureBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h
index cbbfb577a91..75336c497eb 100644
--- a/src/IO/WriteBufferFromAzureBlobStorage.h
+++ b/src/IO/WriteBufferFromAzureBlobStorage.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h
index 88c732c2db6..f984b4e4691 100644
--- a/src/Server/CertificateReloader.h
+++ b/src/Server/CertificateReloader.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif
+#include <Common/config.h>
 
 #if USE_SSL
 

From a0fe29e2433147756614133b1805703288aa077c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 23:59:49 +0200
Subject: [PATCH 075/110] Remove Arcadia

---
 src/Interpreters/ActionsVisitor.cpp | 18 ++++++++++++++++++
 src/Interpreters/ActionsVisitor.h   | 27 ---------------------------
 2 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 181ac9aed7e..3bad8fba270 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -252,6 +252,17 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co
     return header.cloneWithColumns(std::move(columns));
 }
 
+
+namespace
+{
+
+/** Create a block for set from expression.
+  * 'set_element_types' - types of what are on the left hand side of IN.
+  * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
+  *
+  *  We need special implementation for ASTFunction, because in case, when we interpret
+  *  large tuple or array as function, `evaluateConstantExpression` works extremely slow.
+  */
 Block createBlockForSet(
     const DataTypePtr & left_arg_type,
     const ASTPtr & right_arg,
@@ -295,6 +306,10 @@ Block createBlockForSet(
     return block;
 }
 
+/** Create a block for set from literal.
+  * 'set_element_types' - types of what are on the left hand side of IN.
+  * 'right_arg' - Literal - Tuple or Array.
+  */
 Block createBlockForSet(
     const DataTypePtr & left_arg_type,
     const std::shared_ptr<ASTFunction> & right_arg,
@@ -346,6 +361,9 @@ Block createBlockForSet(
     return createBlockFromAST(elements_ast, set_element_types, context);
 }
 
+}
+
+
 SetPtr makeExplicitSet(
     const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
     ContextPtr context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index 342cc9eef9d..d1558cb961c 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -27,33 +27,6 @@ SetPtr makeExplicitSet(
     const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
     ContextPtr context, const SizeLimits & limits, PreparedSets & prepared_sets);
 
-/** Create a block for set from expression.
-  * 'set_element_types' - types of what are on the left hand side of IN.
-  * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
-  *
-  *  We need special implementation for ASTFunction, because in case, when we interpret
-  *  large tuple or array as function, `evaluateConstantExpression` works extremely slow.
-  *
-  *  Note: this and following functions are used in third-party applications in Arcadia, so
-  *  they should be declared in header file.
-  *
-  */
-Block createBlockForSet(
-    const DataTypePtr & left_arg_type,
-    const std::shared_ptr<ASTFunction> & right_arg,
-    const DataTypes & set_element_types,
-    ContextPtr context);
-
-/** Create a block for set from literal.
-  * 'set_element_types' - types of what are on the left hand side of IN.
-  * 'right_arg' - Literal - Tuple or Array.
-  */
-Block createBlockForSet(
-    const DataTypePtr & left_arg_type,
-    const ASTPtr & right_arg,
-    const DataTypes & set_element_types,
-    ContextPtr context);
-
 /** For ActionsVisitor
   * A stack of ExpressionActions corresponding to nested lambda expressions.
   * The new action should be added to the highest possible level.

From cbeeb7ec4f5d055344973d908c716e5d166ea0b3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:20:47 +0200
Subject: [PATCH 076/110] Remove Arcadia

---
 src/AggregateFunctions/UniquesHashSet.h       |   2 +-
 src/Common/Config/ConfigProcessor.h           |   3 +-
 src/Common/HashTable/Hash.h                   |   2 +-
 src/Common/OptimizedRegularExpression.cpp     |   2 +-
 src/Common/StringUtils/CMakeLists.txt         |   2 +-
 src/Common/ZooKeeper/ZooKeeperCommon.h        |   2 +-
 src/Common/ZooKeeper/examples/CMakeLists.txt  |   3 -
 .../examples/zk_many_watches_reconnect.cpp    |  66 ---------
 src/Common/mysqlxx/mysqlxx/mysqlxx.h          |   6 +-
 src/Common/parseAddress.h                     |   4 +-
 src/Functions/DateTimeTransforms.h            |   2 +-
 src/Functions/FunctionFactory.h               |   2 +-
 src/Functions/FunctionMathUnary.h             |   2 +-
 src/Functions/FunctionsEmbeddedDictionaries.h |   2 +-
 src/Functions/FunctionsExternalDictionaries.h |   2 +-
 src/Functions/FunctionsVisitParam.h           |   2 +-
 src/Functions/URL/FunctionsURL.h              |   2 +-
 src/Functions/array/arrayEnumerateRanked.h    |   2 +-
 src/Functions/array/arrayIndex.h              |   4 +-
 src/Functions/extractTextFromHTML.cpp         |   2 +-
 src/Functions/timeSlots.cpp                   |   2 +-
 src/IO/ReadHelpers.cpp                        |   2 +-
 src/IO/tests/gtest_s3_uri.cpp                 |  34 ++---
 ...OptimizeIfWithConstantConditionVisitor.cpp |   2 +-
 src/Parsers/ASTFunction.cpp                   |   2 +-
 src/Parsers/ExpressionElementParsers.cpp      |   2 +-
 .../Impl/JSONAsStringRowInputFormat.cpp       |   1 +
 .../Algorithms/tests/gtest_graphite.cpp       |   8 +-
 .../Transforms/ExpressionTransform.h          |   2 +-
 src/Storages/examples/CMakeLists.txt          |  13 --
 .../examples/remove_symlink_directory.cpp     |  35 -----
 .../examples/transform_part_zk_nodes.cpp      | 131 ------------------
 32 files changed, 49 insertions(+), 299 deletions(-)
 delete mode 100644 src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
 delete mode 100644 src/Storages/examples/remove_symlink_directory.cpp
 delete mode 100644 src/Storages/examples/transform_part_zk_nodes.cpp

diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h
index 6837803c67d..8648f6e2500 100644
--- a/src/AggregateFunctions/UniquesHashSet.h
+++ b/src/AggregateFunctions/UniquesHashSet.h
@@ -59,7 +59,7 @@
 
 
 /** This hash function is not the most optimal, but UniquesHashSet states counted with it,
-  * stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
+  * stored in many places on disks (in many companies), so it continues to be used.
   */
 struct UniquesHashSetDefaultHash
 {
diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h
index 6c642690945..0e1d0facf48 100644
--- a/src/Common/Config/ConfigProcessor.h
+++ b/src/Common/Config/ConfigProcessor.h
@@ -55,7 +55,7 @@ public:
     /// 2) Determine the includes file from the config: <include_from>/path2/metrika.xml</include_from>
     ///    If this path is not configured, use /etc/metrika.xml
     /// 3) Replace elements matching the "<foo incl="bar"/>" pattern with
-    ///    "<foo>contents of the yandex/bar element in metrika.xml</foo>"
+    ///    "<foo>contents of the clickhouse/bar element in metrika.xml</foo>"
     /// 4) If zk_node_cache is non-NULL, replace elements matching the "<foo from_zk="/bar">" pattern with
     ///    "<foo>contents of the /bar ZooKeeper node</foo>".
     ///    If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true.
@@ -137,4 +137,3 @@ private:
 };
 
 }
-
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 5dbeeecf96b..3cf8978f418 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -13,7 +13,7 @@
   *
   * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
   * This is because of following reasons:
-  * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
+  * - in Metrica web analytics system, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
   * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
   * - traffic is non-uniformly distributed across a day;
   * - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 1464923e6ab..da348adbe31 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -28,7 +28,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
       *  in which all metacharacters are escaped,
       *  and also if there are no '|' outside the brackets,
       *  and also avoid substrings of the form `http://` or `www` and some other
-      *   (this is the hack for typical use case in Yandex.Metrica).
+      *   (this is the hack for typical use case in web analytics applications).
       */
     const char * begin = regexp.data();
     const char * pos = begin;
diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt
index 4eedbf4842b..57c196d335c 100644
--- a/src/Common/StringUtils/CMakeLists.txt
+++ b/src/Common/StringUtils/CMakeLists.txt
@@ -1,4 +1,4 @@
-# These files are located in separate library, because they are used by Yandex.Metrika code
+# These files are located in separate library, because they are used by separate products
 #  in places when no dependency on whole "dbms" library is possible.
 
 include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index e51bea3f7f8..532488c08f8 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -45,7 +45,7 @@ struct ZooKeeperResponse : virtual Response
 
 using ZooKeeperResponsePtr = std::shared_ptr<ZooKeeperResponse>;
 
-/// Exposed in header file for Yandex.Metrica code.
+/// Exposed in header file for some external code.
 struct ZooKeeperRequest : virtual Request
 {
     XID xid = 0;
diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt
index 8bec951e24f..b449b172605 100644
--- a/src/Common/ZooKeeper/examples/CMakeLists.txt
+++ b/src/Common/ZooKeeper/examples/CMakeLists.txt
@@ -7,8 +7,5 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo
 add_executable(zkutil_test_async zkutil_test_async.cpp)
 target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
 
-add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
-target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_config)
-
 add_executable (zookeeper_impl zookeeper_impl.cpp)
 target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log)
diff --git a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp b/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
deleted file mode 100644
index cf819121234..00000000000
--- a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <Common/Config/ConfigProcessor.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Poco/Event.h>
-#include <iostream>
-
-/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706
-/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message
-/// exceeds jute.maxbuffer (0xfffff by default).
-/// This happens when the number of watches exceeds ~29000.
-///
-/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g.
-/// sudo ip6tables -A OUTPUT -d mtzoo01it.haze.yandex.net -j REJECT
-
-const size_t N_THREADS = 100;
-
-int main(int argc, char ** argv)
-{
-    try
-    {
-        if (argc != 3)
-        {
-            std::cerr << "usage: " << argv[0] << " <zookeeper_config> <number_of_watches>" << std::endl;
-            return 3;
-        }
-
-        DB::ConfigProcessor processor(argv[1], false, true);
-        auto config = processor.loadConfig().configuration;
-        zkutil::ZooKeeper zk(*config, "zookeeper", nullptr);
-        zkutil::EventPtr watch = std::make_shared<Poco::Event>();
-
-        /// NOTE: setting watches in multiple threads because doing it in a single thread is too slow.
-        size_t watches_per_thread = std::stoull(argv[2]) / N_THREADS;
-        std::vector<std::thread> threads;
-        for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
-        {
-            threads.emplace_back([&, i_thread]
-                {
-                    for (size_t i = 0; i < watches_per_thread; ++i)
-                        zk.exists("/clickhouse/nonexistent_node" + std::to_string(i * N_THREADS + i_thread), nullptr, watch);
-                });
-        }
-        for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
-            threads[i_thread].join();
-
-        while (true)
-        {
-            std::cerr << "WAITING..." << std::endl;
-            sleep(10);
-        }
-    }
-    catch (Poco::Exception & e)
-    {
-        std::cerr << "Exception: " << e.displayText() << std::endl;
-        return 1;
-    }
-    catch (std::exception & e)
-    {
-        std::cerr << "std::exception: " << e.what() << std::endl;
-        return 3;
-    }
-    catch (...)
-    {
-        std::cerr << "Some exception" << std::endl;
-        return 2;
-    }
-}
diff --git a/src/Common/mysqlxx/mysqlxx/mysqlxx.h b/src/Common/mysqlxx/mysqlxx/mysqlxx.h
index 785d4361fd7..0b6cf5e7c1a 100644
--- a/src/Common/mysqlxx/mysqlxx/mysqlxx.h
+++ b/src/Common/mysqlxx/mysqlxx/mysqlxx.h
@@ -10,10 +10,10 @@
 
 /** 'mysqlxx' - very simple library for replacement of 'mysql++' library.
   *
-  * For whatever reason, in Yandex.Metrica, back in 2008, 'mysql++' library was used.
+  * For whatever reason, in Metrica web analytics system, back in 2008, 'mysql++' library was used.
   * There are the following shortcomings of 'mysql++':
   * 1. Too rich functionality: most of it is not used.
-  * 2. Low performance (when used for Yandex.Metrica).
+  * 2. Low performance (when used for Metrica).
   *
   * Low performance is caused by the following reasons:
   *
@@ -50,7 +50,7 @@
   * And for the sake of simplicity, some functions work only with certain assumptions,
   *  or with slightly different semantic than in mysql++.
   * And we don't care about cross-platform usage of mysqlxx.
-  * These assumptions are specific for Yandex.Metrica. Your mileage may vary.
+  * These assumptions are specific for Metrica. Your mileage may vary.
   *
   * mysqlxx could not be considered as separate full-featured library,
   *  because it is developed from the principle - "everything that we don't need is not implemented".
diff --git a/src/Common/parseAddress.h b/src/Common/parseAddress.h
index 86340982c67..297934a6379 100644
--- a/src/Common/parseAddress.h
+++ b/src/Common/parseAddress.h
@@ -13,8 +13,8 @@ namespace DB
   * Otherwise, an exception is thrown.
   *
   * Examples:
-  *  yandex.ru - returns "yandex.ru" and default_port
-  *  yandex.ru:80 - returns "yandex.ru" and 80
+  *  clickhouse.com - returns "clickhouse.com" and default_port
+  *  clickhouse.com:80 - returns "clickhouse.com" and 80
   *  [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
   */
 std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 3a7baf45adc..5d1bcaf48cf 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -539,7 +539,7 @@ struct ToStartOfFifteenMinutesImpl
     using FactorTransform = ZeroTransform;
 };
 
-/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Yandex.Metrica.
+/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Metrica web analytics system.
 struct TimeSlotImpl
 {
     static constexpr auto name = "timeSlot";
diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h
index 898a463cd58..13b14559ec4 100644
--- a/src/Functions/FunctionFactory.h
+++ b/src/Functions/FunctionFactory.h
@@ -40,7 +40,7 @@ public:
             registerFunction(name, &Function::create, case_sensitiveness);
     }
 
-    /// This function is used by YQL - internal Yandex product that depends on ClickHouse by source code.
+    /// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code.
     std::vector<std::string> getAllNames() const;
 
     bool has(const std::string & name) const;
diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h
index fa10c004e87..bd656db792b 100644
--- a/src/Functions/FunctionMathUnary.h
+++ b/src/Functions/FunctionMathUnary.h
@@ -10,7 +10,7 @@
 
 #include "config_functions.h"
 
-/** FastOps is a fast vector math library from Mikhail Parakhin (former Yandex CTO),
+/** FastOps is a fast vector math library from Mikhail Parakhin, https://www.linkedin.com/in/mikhail-parakhin/
   * Enabled by default.
   */
 #if USE_FASTOPS
diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h
index c6ea886b4a8..20be3ee3cce 100644
--- a/src/Functions/FunctionsEmbeddedDictionaries.h
+++ b/src/Functions/FunctionsEmbeddedDictionaries.h
@@ -33,7 +33,7 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-/** Functions using Yandex.Metrica dictionaries
+/** Functions using deprecated dictionaries
   * - dictionaries of regions, operating systems, search engines.
   *
   * Climb up the tree to a certain level.
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 6a701d7b864..189ec7321c1 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -195,7 +195,7 @@ public:
           *  that is the initiator of a distributed query,
           *  in the case when the function will be invoked for real data only at the remote servers.
           * This feature is controversial and implemented specially
-          *  for backward compatibility with the case in Yandex Banner System.
+          *  for backward compatibility with the case in the Banner System application.
           */
         if (input_rows_count == 0)
             return result_type->createColumn();
diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h
index 362c3bcd693..09fcf8659ed 100644
--- a/src/Functions/FunctionsVisitParam.h
+++ b/src/Functions/FunctionsVisitParam.h
@@ -12,7 +12,7 @@
 
 
 /** Functions for retrieving "visit parameters".
- * Visit parameters in Yandex.Metrika are a special kind of JSONs.
+ * Visit parameters in Metrica web analytics system are a special kind of JSONs.
  * These functions are applicable to almost any JSONs.
  * Implemented via templates from FunctionsStringSearch.h.
  *
diff --git a/src/Functions/URL/FunctionsURL.h b/src/Functions/URL/FunctionsURL.h
index 297b62ca256..a0f106742fb 100644
--- a/src/Functions/URL/FunctionsURL.h
+++ b/src/Functions/URL/FunctionsURL.h
@@ -45,7 +45,7 @@ namespace DB
   * Remove specified parameter from URL.
   *  cutURLParameter(URL, name)
   *
-  * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs.
+  * Get array of URL 'hierarchy' as in web-analytics tree-like reports. See the docs.
   *  URLHierarchy(URL)
   */
 
diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h
index d6a62a966ae..ac3936af0fd 100644
--- a/src/Functions/array/arrayEnumerateRanked.h
+++ b/src/Functions/array/arrayEnumerateRanked.h
@@ -15,7 +15,7 @@
 // for better debug: #include <Core/iostream_debug_helpers.h>
 
 /** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
-  * This is very unusual function made as a special order for Yandex.Metrica.
+  * This is very unusual function made as a special order for our dear customer - Metrica web analytics system.
   *
   * arrayEnumerateUniqRanked(['hello', 'world', 'hello']) = [1, 1, 2]
   * - it returns similar structured array containing number of occurrence of the corresponding value.
diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h
index 8b42b99cd69..0dbbe5e41b6 100644
--- a/src/Functions/array/arrayIndex.h
+++ b/src/Functions/array/arrayIndex.h
@@ -718,9 +718,7 @@ private:
     /**
      * Catches arguments of type LowCardinality(T) (left) and U (right).
      *
-     * The perftests
-     * https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
-     * showed that the amount of action needed to convert the non-constant right argument to the index column
+     * The perftests showed that the amount of action needed to convert the non-constant right argument to the index column
      * (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
      *
      * So, in terms of performance it's more optimal to fall back to default implementation and catch only constant
diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
index b38ea74d6ce..f321a59f734 100644
--- a/src/Functions/extractTextFromHTML.cpp
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -53,7 +53,7 @@
   *
   * Usage example:
   *
-  * SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
+  * SELECT extractTextFromHTML(html) FROM url('https://github.com/ClickHouse/ClickHouse', RawBLOB, 'html String')
   *
   * - ClickHouse has embedded web browser.
   */
diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp
index 34af410befa..a19ccf62565 100644
--- a/src/Functions/timeSlots.cpp
+++ b/src/Functions/timeSlots.cpp
@@ -29,7 +29,7 @@ namespace
   *  For example, timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')].
   *  This is necessary to search for hits that are part of the corresponding visit.
   *
-  * This is obsolete function. It was developed for Yandex.Metrica, but no longer used in Yandex.
+  * This is obsolete function. It was developed for Metrica web analytics system, but the art of its usage has been forgotten.
   * But this function was adopted by wider audience.
   */
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index bf3cccccab8..8aee5713cdc 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -343,7 +343,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
             && decoded_char != '"'
             && decoded_char != '`'  /// MySQL style identifiers
             && decoded_char != '/'  /// JavaScript in HTML
-            && decoded_char != '='  /// Yandex's TSKV
+            && decoded_char != '='  /// TSKV format invented somewhere
             && !isControlASCII(decoded_char))
         {
             s.push_back('\\');
diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp
index 7ee72069e57..20d19437c64 100644
--- a/src/IO/tests/gtest_s3_uri.cpp
+++ b/src/IO/tests/gtest_s3_uri.cpp
@@ -16,36 +16,36 @@ class S3UriTest : public testing::TestWithParam<std::string>
 TEST(S3UriTest, validPatterns)
 {
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://s3.yandexcloud.net/jokserfn/"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://s3.amazonaws.com/jokserfn/"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://yandexcloud.net/bucket/"));
-        ASSERT_EQ("https://yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://amazonaws.com/bucket/"));
+        ASSERT_EQ("https://amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucket", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/data"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/data"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data"));
-        ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://storage.amazonaws.com/jokserfn/data"));
+        ASSERT_EQ("https://storage.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
@@ -97,13 +97,13 @@ INSTANTIATE_TEST_SUITE_P(
     S3UriTest,
     testing::Values(
         "https:///",
-        "https://.s3.yandexcloud.net/key",
-        "https://s3.yandexcloud.net/key",
-        "https://jokserfn.s3yandexcloud.net/key",
-        "https://s3.yandexcloud.net//",
-        "https://yandexcloud.net/",
-        "https://yandexcloud.net//",
-        "https://yandexcloud.net//key"));
+        "https://.s3.amazonaws.com/key",
+        "https://s3.amazonaws.com/key",
+        "https://jokserfn.s3amazonaws.com/key",
+        "https://s3.amazonaws.com//",
+        "https://amazonaws.com/",
+        "https://amazonaws.com//",
+        "https://amazonaws.com//key"));
 
 }
 
diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
index 802bf4e43ce..28b19f6670d 100644
--- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
+++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
@@ -28,7 +28,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v
     }
 
     /// cast of numeric constant in condition to UInt8
-    /// Note: this solution is ad-hoc and only implemented for yandex.metrica use case.
+    /// Note: this solution is ad-hoc and only implemented for metrica use case (one of the best customers).
     /// We should allow any constant condition (or maybe remove this optimization completely) later.
     if (const auto * function = condition->as<ASTFunction>())
     {
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index f9a5c7be75f..b86929b054c 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -166,7 +166,7 @@ ASTPtr ASTFunction::toLiteral() const
   * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes.
   * With highlighting we make it clearly obvious.
   *
-  * Another case is regexp match. Suppose the user types match(URL, 'www.yandex.ru'). It often means that the user is unaware that . is a metacharacter.
+  * Another case is regexp match. Suppose the user types match(URL, 'www.clickhouse.com'). It often means that the user is unaware that . is a metacharacter.
   */
 static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const IAST::FormatSettings & settings, const char * metacharacters)
 {
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index cd07e304a39..29c7846283e 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1727,7 +1727,7 @@ const char * ParserAlias::restricted_keywords[] =
     "NOT",
     "OFFSET",
     "ON",
-    "ONLY", /// YQL synonym for ANTI. Note: YQL is the name of one of Yandex proprietary languages, completely unrelated to ClickHouse.
+    "ONLY", /// YQL's synonym for ANTI. Note: YQL is the name of one of proprietary languages, completely unrelated to ClickHouse.
     "ORDER",
     "PREWHERE",
     "RIGHT",
diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
index 9bf1682b77e..e31006ff0f6 100644
--- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
@@ -52,6 +52,7 @@ void JSONAsRowInputFormat::readSuffix()
     {
         assertChar(']', *buf);
         skipWhitespaceIfAny(*buf);
+        data_in_square_brackets = false;
     }
     if (!buf->eof() && *buf->position() == ';')
     {
diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
index 1d739bf566a..9160ad6e0fa 100644
--- a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
+++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
@@ -149,7 +149,7 @@ TEST(GraphiteTest, testSelectPattern)
     using namespace std::literals;
 
     std::string
-        xml(R"END(<yandex>
+        xml(R"END(<clickhouse>
 <graphite_rollup>
     <pattern>
         <regexp>\.sum$</regexp>
@@ -210,7 +210,7 @@ TEST(GraphiteTest, testSelectPattern)
         </retention>
     </default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
 )END");
 
     // Retentions must be ordered by 'age' descending.
@@ -370,7 +370,7 @@ TEST(GraphiteTest, testSelectPatternTyped)
     using namespace std::literals;
 
     std::string
-        xml(R"END(<yandex>
+        xml(R"END(<clickhouse>
 <graphite_rollup>
     <pattern>
         <rule_type>plain</rule_type>
@@ -488,7 +488,7 @@ TEST(GraphiteTest, testSelectPatternTyped)
         </retention>
     </default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
 )END");
 
     // Retentions must be ordered by 'age' descending.
diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h
index ea73c8fb1da..791c7d7ba73 100644
--- a/src/Processors/Transforms/ExpressionTransform.h
+++ b/src/Processors/Transforms/ExpressionTransform.h
@@ -12,7 +12,7 @@ class ActionsDAG;
 
 /** Executes a certain expression over the block.
   * The expression consists of column identifiers from the block, constants, common functions.
-  * For example: hits * 2 + 3, url LIKE '%yandex%'
+  * For example: hits * 2 + 3, url LIKE '%clickhouse%'
   * The expression processes each row independently of the others.
   */
 class ExpressionTransform final : public ISimpleTransform
diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt
index 103972a106f..ca85ca9d98f 100644
--- a/src/Storages/examples/CMakeLists.txt
+++ b/src/Storages/examples/CMakeLists.txt
@@ -1,6 +1,3 @@
-add_executable (remove_symlink_directory remove_symlink_directory.cpp)
-target_link_libraries (remove_symlink_directory PRIVATE dbms)
-
 add_executable (merge_selector merge_selector.cpp)
 target_link_libraries (merge_selector PRIVATE dbms)
 
@@ -12,13 +9,3 @@ target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse
 
 add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp)
 target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
-
-add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp)
-target_link_libraries (transform_part_zk_nodes
-    PRIVATE
-        boost::program_options
-        clickhouse_common_config
-        clickhouse_common_zookeeper
-        dbms
-        string_utils
-)
diff --git a/src/Storages/examples/remove_symlink_directory.cpp b/src/Storages/examples/remove_symlink_directory.cpp
deleted file mode 100644
index db436c0a608..00000000000
--- a/src/Storages/examples/remove_symlink_directory.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <unistd.h>
-#include <iostream>
-#include <Common/Exception.h>
-#include <Common/filesystemHelpers.h>
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace DB
-{
-    namespace ErrorCodes
-    {
-        extern const int SYSTEM_ERROR;
-    }
-}
-
-int main(int, char **)
-try
-{
-    fs::path dir("./test_dir/");
-    fs::create_directories(dir);
-    FS::createFile("./test_dir/file");
-
-    if (0 != symlink("./test_dir", "./test_link"))
-        DB::throwFromErrnoWithPath("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR);
-
-    fs::rename("./test_link", "./test_link2");
-    fs::remove_all("./test_link2");
-    return 0;
-}
-catch (...)
-{
-    std::cerr << DB::getCurrentExceptionMessage(false) << "\n";
-    return 1;
-}
diff --git a/src/Storages/examples/transform_part_zk_nodes.cpp b/src/Storages/examples/transform_part_zk_nodes.cpp
deleted file mode 100644
index 3cbcc76190e..00000000000
--- a/src/Storages/examples/transform_part_zk_nodes.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
-#include <Common/Exception.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/KeeperException.h>
-
-#include <boost/program_options.hpp>
-
-#include <list>
-#include <iostream>
-
-
-int main(int argc, char ** argv)
-try
-{
-    boost::program_options::options_description desc("Allowed options");
-    desc.add_options()
-        ("help,h", "produce help message")
-        ("address,a", boost::program_options::value<std::string>()->required(),
-            "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
-        ("path,p", boost::program_options::value<std::string>()->required(),
-            "where to start")
-    ;
-
-    boost::program_options::variables_map options;
-    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
-
-    if (options.count("help"))
-    {
-        std::cout << "Transform contents of part nodes in ZooKeeper to more compact storage scheme." << std::endl;
-        std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
-        std::cout << desc << std::endl;
-        return 1;
-    }
-
-    zkutil::ZooKeeper zookeeper(options.at("address").as<std::string>());
-
-    std::string initial_path = options.at("path").as<std::string>();
-
-    struct Node
-    {
-        Node(
-            std::string path_,
-            std::future<Coordination::GetResponse> get_future_,
-            std::future<Coordination::ListResponse> children_future_,
-            Node * parent_)
-            : path(std::move(path_))
-            , get_future(std::move(get_future_))
-            , children_future(std::move(children_future_))
-            , parent(parent_)
-        {
-        }
-
-        std::string path;
-        std::future<Coordination::GetResponse> get_future;
-        std::future<Coordination::ListResponse> children_future;
-
-        Node * parent = nullptr;
-        std::future<Coordination::MultiResponse> set_future;
-    };
-
-    std::list<Node> nodes_queue;
-    nodes_queue.emplace_back(
-        initial_path, zookeeper.asyncGet(initial_path), zookeeper.asyncGetChildren(initial_path), nullptr);
-
-    for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
-    {
-        Coordination::GetResponse get_response;
-        Coordination::ListResponse children_response;
-        try
-        {
-            get_response = it->get_future.get();
-            children_response = it->children_future.get();
-        }
-        catch (const Coordination::Exception & e)
-        {
-            if (e.code == Coordination::Error::ZNONODE)
-                continue;
-            throw;
-        }
-
-        if (get_response.stat.ephemeralOwner)
-            continue;
-
-        if (it->path.find("/parts/") != std::string::npos
-            && !endsWith(it->path, "/columns")
-            && !endsWith(it->path, "/checksums"))
-        {
-            /// The node is related to part.
-
-            /// If it is the part in old format (the node contains children) - convert it to the new format.
-            if (!children_response.names.empty())
-            {
-                auto part_header =  DB::ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
-                    zookeeper.get(it->path + "/columns"), zookeeper.get(it->path + "/checksums"));
-
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/columns", -1));
-                ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/checksums", -1));
-                ops.emplace_back(zkutil::makeSetRequest(it->path, part_header.toString(), -1));
-
-                it->set_future = zookeeper.asyncMulti(ops);
-            }
-        }
-        else
-        {
-            /// Recursively add children to the queue.
-            for (const auto & name : children_response.names)
-            {
-                std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
-                nodes_queue.emplace_back(
-                    child_path, zookeeper.asyncGet(child_path), zookeeper.asyncGetChildren(child_path),
-                    &(*it));
-            }
-        }
-    }
-
-    for (auto & node : nodes_queue)
-    {
-        if (node.set_future.valid())
-        {
-            node.set_future.get();
-            std::cerr << node.path << " changed!" << std::endl;
-        }
-    }
-}
-catch (...)
-{
-    std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
-    throw;
-}

From e128d89957816877944e37531a4e9acc0ed477b5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:24:58 +0200
Subject: [PATCH 077/110] Remove Arcadia

---
 src/Disks/DiskWebServer.h                     |   2 +-
 .../clickhouse-diagnostics                    |   2 +-
 utils/graphite-rollup/rollup-tag-list.xml     |   4 +-
 utils/graphite-rollup/rollup-typed.xml        |   4 +-
 utils/graphite-rollup/rollup.xml              |   4 +-
 utils/release/push_packages                   | 250 ------------------
 utils/s3tools/s3uploader                      |   4 +-
 .../main.cpp                                  |   2 +-
 .../main.cpp                                  |   2 +-
 utils/zookeeper-dump-tree/main.cpp            |   2 +-
 utils/zookeeper-remove-by-list/main.cpp       |   2 +-
 11 files changed, 14 insertions(+), 264 deletions(-)
 delete mode 100755 utils/release/push_packages

diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h
index 6341b582174..98f92fe5986 100644
--- a/src/Disks/DiskWebServer.h
+++ b/src/Disks/DiskWebServer.h
@@ -20,7 +20,7 @@ namespace ErrorCodes
  *       <disks>
  *           <web>
  *               <type>web</type>
- *               <endpoint>https://clickhouse-datasets.s3.yandex.net/disk-with-static-files-tests/test-hits/</endpoint>
+ *               <endpoint>https://clickhouse-datasets.s3.amazonaws.com/disk-with-static-files-tests/test-hits/</endpoint>
  *           </web>
  *       </disks>
  *       <policies>
diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics
index 2fe67071c3c..cf65e4efbfb 100755
--- a/utils/clickhouse-diagnostics/clickhouse-diagnostics
+++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics
@@ -665,7 +665,7 @@ class DiagnosticsData:
 
     def _dump_wiki(self):
         """
-        Dump diagnostic data in Yandex wiki format.
+        Dump diagnostic data in Wiki format.
         """
 
         def _write_title(buffer, value):
diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml
index ef28f2089ad..edab2f16436 100644
--- a/utils/graphite-rollup/rollup-tag-list.xml
+++ b/utils/graphite-rollup/rollup-tag-list.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
         <rule_type>plain</rule_type>
@@ -164,4 +164,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml
index 0b27d43ece9..ace439dba4a 100644
--- a/utils/graphite-rollup/rollup-typed.xml
+++ b/utils/graphite-rollup/rollup-typed.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
         <rule_type>plain</rule_type>
@@ -164,4 +164,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml
index 641b0130509..2089605c8bf 100644
--- a/utils/graphite-rollup/rollup.xml
+++ b/utils/graphite-rollup/rollup.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
  		<regexp>\.sum$</regexp>
@@ -144,4 +144,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/release/push_packages b/utils/release/push_packages
deleted file mode 100755
index e25cb325c71..00000000000
--- a/utils/release/push_packages
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import argparse
-import subprocess
-import os
-import logging
-import shutil
-import base64
-import pexpect
-
-
-# Do nothing if keys are not provided
-class GpgKey(object):
-    gnupg_dir = os.path.expanduser('~/.gnupg')
-    TEMPGNUPG_DIR = os.path.expanduser('~/.local/tempgnupg')
-
-    def __init__(self, secret_key_path, public_key_path):
-        if secret_key_path and public_key_path:
-            with open(secret_key_path, 'r') as sec, open(public_key_path, 'r') as pub:
-                self._secret_key = sec.read()
-                self._public_key = pub.read()
-        else:
-            self._secret_key = None
-            self._public_key = None
-
-    def __enter__(self):
-        if self._secret_key and self._public_key:
-            if os.path.exists(self.gnupg_dir):
-                shutil.move(self.gnupg_dir, self.TEMPGNUPG_DIR)
-            os.mkdir(self.gnupg_dir)
-            open(os.path.join(self.gnupg_dir, 'secring.gpg'), 'wb').write(base64.b64decode(self._secret_key))
-            open(os.path.join(self.gnupg_dir, 'pubring.gpg'), 'wb').write(base64.b64decode(self._public_key))
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self._secret_key and self._public_key:
-            shutil.rmtree(self.gnupg_dir)
-            if os.path.exists(self.TEMPGNUPG_DIR):
-                shutil.move(self.TEMPGNUPG_DIR, self.gnupg_dir)
-
-
-class DebRelease(object):
-
-    DUPLOAD_CONF_TEMPLATE = '\n\t'.join((
-        "$cfg{{'{title}'}} = {{",
-        'fqdn => "{fqdn}",',
-        'method => "{method}",',
-        'login => "{login}",',
-        'incoming => "{incoming}",',
-        'options => "{options}",',
-        'dinstall_runs => {dinstall_runs},\n}};',))
-    DUPLOAD_CONF_PATH = os.path.expanduser('~/.dupload.conf')
-    DUPLOAD_CONF_TMP_PATH = os.path.expanduser('~/.local/tmp_dupload.cnf')
-
-    def __init__(self, dupload_config, login, ssh_key_path):
-        self.__config = {}
-        for repo, conf in dupload_config.items():
-            d = {
-                "fqdn": conf["fqdn"],
-                "method": "scpb",
-                "login": login,
-                "incoming": conf["incoming"],
-                "dinstall_runs": 0,
-                "options": "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=3",
-            }
-            d.update(conf)
-            self.__config[repo] = d
-        print(self.__config)
-        self.ssh_key_path = ssh_key_path
-
-    def __enter__(self):
-        if os.path.exists(self.DUPLOAD_CONF_PATH):
-            shutil.move(self.DUPLOAD_CONF_PATH, self.DUPLOAD_CONF_TMP_PATH)
-        self.__dupload_conf = open(self.DUPLOAD_CONF_PATH, 'w')
-        self.__dupload_conf.write('package config;\n\n$default_host = undef;\n\n' + '\n\n'.join([
-            self.DUPLOAD_CONF_TEMPLATE.format(title=title, **values)
-            for title, values in self.__config.items()]))
-        self.__dupload_conf.write('\n')
-        self.__dupload_conf.close()
-        if self.ssh_key_path:
-            subprocess.check_call("ssh-add {}".format(self.ssh_key_path), shell=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if os.path.exists(self.DUPLOAD_CONF_TMP_PATH):
-            shutil.move(self.DUPLOAD_CONF_TMP_PATH, self.DUPLOAD_CONF_PATH)
-        else:
-            os.unlink(self.DUPLOAD_CONF_PATH)
-
-
-class SSHConnection(object):
-    def __init__(self, user, host, ssh_key=None):
-        if ssh_key:
-            key_str = "-i {}".format(ssh_key)
-        else:
-            key_str = ""
-
-        self.base_cmd = "ssh {key} {user}@{host}".format(
-            key=key_str, user=user, host=host)
-
-    def execute(self, cmd):
-        logging.info("Executing remote cmd %s", cmd)
-        subprocess.check_call(self.base_cmd + ' "{cmd}"'.format(cmd=cmd),
-                              shell=True)
-
-
-def debsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            cmd = ('debsign -k \'{key}\' -p"gpg --verbose --no-use-agent --batch '
-                   '--no-tty --passphrase {passphrase}" {path}/*.changes').format(
-                       key=gpg_user, passphrase=gpg_passphrase, path=path)
-            logging.info("Build debsign cmd '%s'", cmd)
-            subprocess.check_call(cmd, shell=True)
-            logging.info("debsign finished")
-    except Exception as ex:
-        logging.error("Cannot debsign packages on path %s, with user key", path)
-        raise ex
-
-def rpmsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            for package in os.listdir(path):
-                package_path = os.path.join(path, package)
-                logging.info("Signing %s", package_path)
-                proc = pexpect.spawn('rpm --resign -D "_signature gpg" -D "_gpg_name {username}" {package}'.format(username=gpg_user, package=package_path))
-                proc.expect_exact("Enter pass phrase: ")
-                proc.sendline(gpg_passphrase)
-                proc.expect(pexpect.EOF)
-                logging.info("Signed successfully")
-    except Exception as ex:
-        logging.error("Cannot rpmsign packages on path %s, with user key", path)
-        raise ex
-
-def transfer_packages_scp(ssh_key, path, repo_user, repo_url, incoming_directory):
-    logging.info("Transferring packages via scp to %s", repo_url)
-    if ssh_key:
-        key_str = "-i {}".format(ssh_key)
-    else:
-        key_str = ""
-    subprocess.check_call('scp {key_str} {path}/* {user}@{repo}:{incoming}'.format(
-        path=path, user=repo_user, repo=repo_url, key_str=key_str, incoming=incoming_directory), shell=True)
-    logging.info("Transfer via scp finished")
-
-def transfer_packages_dupload(ssh_key, path, repo_user, repo_url, incoming_directory):
-    repo_short_name = repo_url.split('.')[0]
-    config = {
-        repo_short_name: {
-            "fqdn": repo_url,
-            "incoming": incoming_directory,
-        }
-    }
-    with DebRelease(config, repo_user, ssh_key):
-        logging.info("Duploading")
-        subprocess.check_call("dupload -f --nomail --to {repo} {path}".format(repo=repo_short_name, path=path), shell=True)
-        logging.info("Dupload finished")
-
-
-def clear_old_incoming_packages(ssh_connection, user):
-    for pkg in ('deb', 'rpm', 'tgz'):
-        for release_type in ('stable', 'testing', 'prestable', 'lts'):
-            try:
-                ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/{release_type}/*".format(
-                    user=user, pkg=pkg, release_type=release_type))
-            except Exception:
-                logging.info("rm is not required")
-
-
-def _get_incoming_path(repo_url, user=None, pkg_type=None, release_type=None):
-    if repo_url == 'repo.mirror.yandex.net':
-        return "/home/{user}/incoming/clickhouse/{pkg}/{release_type}".format(
-            user=user, pkg=pkg_type, release_type=release_type)
-    else:
-        return "/repo/{0}/mini-dinstall/incoming/".format(repo_url.split('.')[0])
-
-
-def _fix_args(args):
-
-    if args.gpg_sec_key_path and not os.path.isabs(args.gpg_sec_key_path):
-        args.gpg_sec_key_path = os.path.join(os.getcwd(), args.gpg_sec_key_path)
-
-    if args.gpg_pub_key_path and not os.path.isabs(args.gpg_pub_key_path):
-        args.gpg_pub_key_path = os.path.join(os.getcwd(), args.gpg_pub_key_path)
-
-    if args.ssh_key_path and not os.path.isabs(args.ssh_key_path):
-        args.ssh_key_path = os.path.join(os.getcwd(), args.ssh_key_path)
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
-    parser = argparse.ArgumentParser(description="Program to push clickhouse packages to repository")
-    parser.add_argument('--deb-directory')
-    parser.add_argument('--rpm-directory')
-    parser.add_argument('--tgz-directory')
-    parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable', 'lts'), default='testing')
-    parser.add_argument('--ssh-key-path')
-    parser.add_argument('--gpg-passphrase', required=True)
-    parser.add_argument('--gpg-sec-key-path')
-    parser.add_argument('--gpg-pub-key-path')
-    parser.add_argument('--gpg-key-user', default='robot-clickhouse')
-    parser.add_argument('--repo-url', default='repo.mirror.yandex.net')
-    parser.add_argument('--repo-user', default='buildfarm')
-
-    args = parser.parse_args()
-    if args.deb_directory is None and args.rpm_directory is None and args.tgz_directory is None:
-        parser.error('At least one package directory required')
-
-    _fix_args(args)
-
-    is_open_source = args.repo_url == 'repo.mirror.yandex.net'
-    ssh_connection = SSHConnection(args.repo_user, args.repo_url, args.ssh_key_path)
-
-    packages = []
-    if args.deb_directory:
-        debsign(args.deb_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.deb_directory, 'deb'))
-
-    if args.rpm_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .rpm package to {}".format(args.repo_url))
-        rpmsign(args.rpm_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.rpm_directory, 'rpm'))
-
-    if args.tgz_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .tgz package to {}".format(args.repo_url))
-        packages.append((args.tgz_directory, 'tgz'))
-
-    if is_open_source:
-        logging.info("Clearing old directory with incoming packages on buildfarm")
-        clear_old_incoming_packages(ssh_connection, args.repo_user)
-        logging.info("Incoming directory cleared")
-
-        for package_path, package_type in packages:
-            logging.info("Processing path '%s' with package type %s", package_path, package_type)
-            incoming_directory = _get_incoming_path(args.repo_url, args.repo_user, package_type, args.release_type)
-            if package_type == "deb":
-                transfer_packages_dupload(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-            else:
-                transfer_packages_scp(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-
-            logging.info("Running clickhouse install (it takes about (20-30 minutes)")
-            ssh_connection.execute("sudo /usr/sbin/ya-clickhouse-{0}-install".format(package_type))
-            logging.info("Clickhouse installed")
-            logging.info("Pushing clickhouse to repo")
-            ssh_connection.execute("/usr/sbin/push2publicrepo.sh clickhouse")
-            logging.info("Push finished")
-            logging.info("Package '%s' pushed", package_type)
-    else:
-        transfer_packages_dupload(args.ssh_key_path, args.deb_directory, args.repo_user, args.repo_url, _get_incoming_path(args.repo_url))
diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader
index 0627b19ea8f..33db76f57f4 100755
--- a/utils/s3tools/s3uploader
+++ b/utils/s3tools/s3uploader
@@ -121,8 +121,8 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Simple tool for uploading datasets to clickhouse S3",
         usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES))
-    parser.add_argument('--s3-api-url', default='s3.mds.yandex.net')
-    parser.add_argument('--s3-common-url', default='s3.yandex.net')
+    parser.add_argument('--s3-api-url', default='s3.amazonaws.com')
+    parser.add_argument('--s3-common-url', default='s3.amazonaws.com')
     parser.add_argument('--bucket-name', default='clickhouse-datasets')
     parser.add_argument('--dataset-name', required=True,
                         help='Name of dataset, will be used in uploaded path')
diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
index 8550675cb9e..5c694ee04ef 100644
--- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
+++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
@@ -214,7 +214,7 @@ try
     po::options_description desc("Allowed options");
     desc.add_options()
     ("help,h", "show help")
-    ("zookeeper,z", po::value<std::string>(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.yandex.ru:2181")
+    ("zookeeper,z", po::value<std::string>(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.clickhouse.com:2181")
     ("path,p", po::value<std::string>(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables")
     ("shard,s", po::value<std::string>(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.")
     ("table,t", po::value<std::string>(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.")
diff --git a/utils/zookeeper-create-entry-to-download-part/main.cpp b/utils/zookeeper-create-entry-to-download-part/main.cpp
index 1e86fe248ab..b92857929b7 100644
--- a/utils/zookeeper-create-entry-to-download-part/main.cpp
+++ b/utils/zookeeper-create-entry-to-download-part/main.cpp
@@ -11,7 +11,7 @@ try
     desc.add_options()
     ("help,h", "produce help message")
     ("address,a", boost::program_options::value<std::string>()->required(),
-     "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+     "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
     ("path,p", boost::program_options::value<std::string>()->required(), "path of replica queue to insert node (without trailing slash)")
     ("name,n", boost::program_options::value<std::string>()->required(), "name of part to download")
     ;
diff --git a/utils/zookeeper-dump-tree/main.cpp b/utils/zookeeper-dump-tree/main.cpp
index 893056564bb..d85762df640 100644
--- a/utils/zookeeper-dump-tree/main.cpp
+++ b/utils/zookeeper-dump-tree/main.cpp
@@ -14,7 +14,7 @@ int main(int argc, char ** argv)
         desc.add_options()
             ("help,h", "produce help message")
             ("address,a", boost::program_options::value<std::string>()->required(),
-                "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+                "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
             ("path,p", boost::program_options::value<std::string>()->default_value("/"),
                 "where to start")
             ("ctime,c", "print node ctime")
diff --git a/utils/zookeeper-remove-by-list/main.cpp b/utils/zookeeper-remove-by-list/main.cpp
index 2c97ffb4a70..cf194bd4861 100644
--- a/utils/zookeeper-remove-by-list/main.cpp
+++ b/utils/zookeeper-remove-by-list/main.cpp
@@ -12,7 +12,7 @@ try
     desc.add_options()
         ("help,h", "produce help message")
         ("address,a", boost::program_options::value<std::string>()->required(),
-        "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+        "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
         ;
 
     boost::program_options::variables_map options;

From ccdd0a60f74e5069efd2c73c332ae281f6c374aa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:28:56 +0200
Subject: [PATCH 078/110] Remove Arcadia

---
 base/base/phdr_cache.cpp | 2 +-
 base/base/phdr_cache.h   | 2 +-
 base/daemon/BaseDaemon.h | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp
index 20a755ed7a4..36a9b4f1f62 100644
--- a/base/base/phdr_cache.cpp
+++ b/base/base/phdr_cache.cpp
@@ -2,7 +2,7 @@
 #pragma clang diagnostic ignored "-Wreserved-identifier"
 #endif
 
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
+/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
 
 #include <base/defines.h>
 
diff --git a/base/base/phdr_cache.h b/base/base/phdr_cache.h
index d2854ece0bc..b522710c4c4 100644
--- a/base/base/phdr_cache.h
+++ b/base/base/phdr_cache.h
@@ -1,6 +1,6 @@
 #pragma once
 
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
+/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
 
 /** Collects all dl_phdr_info items and caches them in a static array.
   * Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache
diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h
index 54a74369dce..152a431922c 100644
--- a/base/daemon/BaseDaemon.h
+++ b/base/daemon/BaseDaemon.h
@@ -76,10 +76,10 @@ public:
     /// return none if daemon doesn't exist, reference to the daemon otherwise
     static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
 
-    /// В Graphite компоненты пути(папки) разделяются точкой.
-    /// У нас принят путь формата root_path.hostname_yandex_ru.key
-    /// root_path по умолчанию one_min
-    /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
+    /// Graphite metric name has components separated by dots.
+    /// We used to have the following format: root_path.hostname_clickhouse_com.key
+    /// root_path - one_min by default
+    /// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total".
     template <class T>
     void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {

From 40357637ec356ec7e65145424a48743b19c1a4ca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:33:32 +0200
Subject: [PATCH 079/110] Remove Arcadia

---
 src/Common/tests/gtest_DateLUTImpl.cpp        |   5 +-
 .../gtest_DateTime64_parsing_and_writing.cpp  | 122 ------------------
 utils/check-style/check-style                 |   5 -
 3 files changed, 2 insertions(+), 130 deletions(-)

diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index 1eec83a6ec9..be96c190be3 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest)
 
 TEST(DateLUTTest, TimeValuesInMiddleOfRange)
 {
-    const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk");
+    const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul");
     const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday)
 
-    EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk");
+    EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul");
     EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3
 
     EXPECT_EQ(lut.toDate(time), 1568581200);
@@ -538,4 +538,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
 //            {0, 0 + 11 * 3600 * 24 + 12, 11},
         }))
 );
-
diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
index c6208af2d5e..b2fd78fa8c2 100644
--- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
+++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
@@ -76,125 +76,3 @@ TEST_P(DateTime64StringParseBestEffortTest, parse)
 
     EXPECT_EQ(param.dt64, actual);
 }
-
-
-// YYYY-MM-DD HH:MM:SS.NNNNNNNNN
-INSTANTIATE_TEST_SUITE_P(Basic,
-    DateTime64StringParseTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "When subsecond part is missing from string it is set to zero.",
-            "2019-09-16 19:20:17",
-            1568650817'000,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string, but it is zero, it is set to zero.",
-            "2019-09-16 19:20:17.0",
-            1568650817'000,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part is not set.",
-            "2019-09-16 19:20:17",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part is 0 despite being present in string.",
-            "2019-09-16 19:20:17.123",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.",
-            "2019-09-16 19:20:17.123",
-            1568650817'123,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.",
-            "2019-09-16 19:20:17.012",
-            1568650817'012,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).",
-            "2019-09-16 19:20:17.123",
-            1568650817'12300ULL,
-            5,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.",
-            "2019-09-16 19:20:17.123",
-            1568650817'1ULL,
-            1,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
-INSTANTIATE_TEST_SUITE_P(BestEffort,
-    DateTime64StringParseBestEffortTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "When subsecond part is unreasonably large, it truncated to given scale",
-            "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364",
-            1568650817'123456ULL,
-            6,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
-
-// TODO: add negative test cases for invalid strings, verifying that error is reported properly
-
-INSTANTIATE_TEST_SUITE_P(Basic,
-    DateTime64StringWriteTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "non-zero subsecond part on DateTime64 with scale of 3",
-            "2019-09-16 19:20:17.123",
-            1568650817'123,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "non-zero subsecond part on DateTime64 with scale of 5",
-            "2019-09-16 19:20:17.12345",
-            1568650817'12345ULL,
-            5,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "Zero subsecond part is written to string",
-            "2019-09-16 19:20:17.000",
-            1568650817'000ULL,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part (and separtor) is missing from string",
-            "2019-09-16 19:20:17",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "Subsecond part with leading zeroes is written to string correctly",
-            "2019-09-16 19:20:17.001",
-            1568650817'001ULL,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 6ebf53cb932..20954781fbd 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -147,11 +147,6 @@ find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.
     grep -vP $EXCLUDE_DIRS |
     xargs yamllint --config-file=$ROOT_PATH/.yamllint
 
-# Machine translation to Russian is strictly prohibited
-find $ROOT_PATH/docs/ru -name '*.md' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -l -F 'machine_translated: true'
-
 # Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
 find $ROOT_PATH/tests/queries -iname '*fail*' |
     grep -vP $EXCLUDE_DIRS |

From 69b91c5410dc5d1153be96b2985a5b78462028e4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:08:06 +0200
Subject: [PATCH 080/110] Simplification #36313

---
 src/Common/getNumberOfPhysicalCPUCores.cpp | 34 +++++++---------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index f415ee58e85..0df7a83cd53 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -5,9 +5,6 @@
 #    include <cmath>
 #    include <fstream>
 #endif
-#if USE_CPUID
-#    include <libcpuid/libcpuid.h>
-#endif
 
 #include <thread>
 
@@ -41,32 +38,21 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
 
 static unsigned getNumberOfPhysicalCPUCoresImpl()
 {
-    unsigned cpu_count = 0; // start with an invalid num
+    unsigned cpu_count = std::thread::hardware_concurrency();
 
-#if USE_CPUID
-    cpu_raw_data_t raw_data;
-    cpu_id_t data;
+    /// Most of x86_64 CPUs have 2-way Hyper-Threading
+    /// Aarch64 and RISC-V don't have SMT so far.
+    /// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
 
-    /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
-    /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
-    /// Also, libcpuid gives strange result on Google Compute Engine VMs.
-    /// Example:
-    ///  num_cores = 12,            /// number of physical cores on current CPU socket
-    ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
-    ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
-    /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
-
-    if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0)
-        cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
+#if defined(__x86_64__)
+    /// Let's limit ourself to the number of physical cores.
+    /// But if the number of logical cores is small - maybe it is a small machine
+    /// or very limited cloud instance and it is reasonable to use all the cores.
+    if (cpu_count >= 8)
+        cpu_count /= 2;
 #endif
 
-    /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
-    /// (Actually, only Aarch64 is supported).
-    if (cpu_count == 0)
-        cpu_count = std::thread::hardware_concurrency();
-
 #if defined(OS_LINUX)
-    /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
     cpu_count = getCGroupLimitedCPUCores(cpu_count);
 #endif
 

From 33ffdcaa4f21f6611eeea8b72c0cf71971baf86d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:18:22 +0200
Subject: [PATCH 081/110] Return two changes

---
 src/Common/tests/gtest_DateLUTImpl.cpp        |   5 +-
 .../gtest_DateTime64_parsing_and_writing.cpp  | 122 ++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index be96c190be3..1eec83a6ec9 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest)
 
 TEST(DateLUTTest, TimeValuesInMiddleOfRange)
 {
-    const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul");
+    const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk");
     const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday)
 
-    EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul");
+    EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk");
     EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3
 
     EXPECT_EQ(lut.toDate(time), 1568581200);
@@ -538,3 +538,4 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
 //            {0, 0 + 11 * 3600 * 24 + 12, 11},
         }))
 );
+
diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
index b2fd78fa8c2..c6208af2d5e 100644
--- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
+++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
@@ -76,3 +76,125 @@ TEST_P(DateTime64StringParseBestEffortTest, parse)
 
     EXPECT_EQ(param.dt64, actual);
 }
+
+
+// YYYY-MM-DD HH:MM:SS.NNNNNNNNN
+INSTANTIATE_TEST_SUITE_P(Basic,
+    DateTime64StringParseTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "When subsecond part is missing from string it is set to zero.",
+            "2019-09-16 19:20:17",
+            1568650817'000,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string, but it is zero, it is set to zero.",
+            "2019-09-16 19:20:17.0",
+            1568650817'000,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part is not set.",
+            "2019-09-16 19:20:17",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part is 0 despite being present in string.",
+            "2019-09-16 19:20:17.123",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.",
+            "2019-09-16 19:20:17.123",
+            1568650817'123,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.",
+            "2019-09-16 19:20:17.012",
+            1568650817'012,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).",
+            "2019-09-16 19:20:17.123",
+            1568650817'12300ULL,
+            5,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.",
+            "2019-09-16 19:20:17.123",
+            1568650817'1ULL,
+            1,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+
+INSTANTIATE_TEST_SUITE_P(BestEffort,
+    DateTime64StringParseBestEffortTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "When subsecond part is unreasonably large, it truncated to given scale",
+            "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364",
+            1568650817'123456ULL,
+            6,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+
+
+// TODO: add negative test cases for invalid strings, verifying that error is reported properly
+
+INSTANTIATE_TEST_SUITE_P(Basic,
+    DateTime64StringWriteTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "non-zero subsecond part on DateTime64 with scale of 3",
+            "2019-09-16 19:20:17.123",
+            1568650817'123,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "non-zero subsecond part on DateTime64 with scale of 5",
+            "2019-09-16 19:20:17.12345",
+            1568650817'12345ULL,
+            5,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "Zero subsecond part is written to string",
+            "2019-09-16 19:20:17.000",
+            1568650817'000ULL,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part (and separtor) is missing from string",
+            "2019-09-16 19:20:17",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "Subsecond part with leading zeroes is written to string correctly",
+            "2019-09-16 19:20:17.001",
+            1568650817'001ULL,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+

From fa46e0d22fb28f62a7125f680c35b7a25edb8b9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:54:21 +0200
Subject: [PATCH 082/110] Add an option for build profiling

---
 CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dad9a25ab26..4f1a6c05730 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -314,6 +314,15 @@ if (ENABLE_BUILD_PATH_MAPPING)
     set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
 endif ()
 
+option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
+if (ENABLE_BUILD_PROFILING)
+     if (COMPILER_CLANG)
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
+     else ()
+        message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
+     endif ()
+endif ()
+
 if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
     # CMake < 3.12 doesn't support setting 20 as a C++ standard version.
     # We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now.

From 294efeccfe7532fe1c29052b7a908e245a0a76ec Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 01:15:40 +0200
Subject: [PATCH 083/110] Fix clang-tidy-14 (part 1)

---
 contrib/sysroot                                      |  2 +-
 src/Access/DiskAccessStorage.cpp                     |  3 ++-
 .../AggregateFunctionAvgWeighted.cpp                 |  4 ++--
 src/AggregateFunctions/AggregateFunctionDeltaSum.cpp |  2 +-
 .../AggregateFunctionGroupArray.cpp                  |  2 +-
 .../AggregateFunctionGroupBitmap.cpp                 |  2 +-
 .../AggregateFunctionGroupUniqArray.cpp              |  2 +-
 src/AggregateFunctions/AggregateFunctionQuantile.cpp |  4 ++--
 src/AggregateFunctions/AggregateFunctionSparkbar.cpp |  2 +-
 .../AggregateFunctionStatisticsSimple.cpp            |  2 +-
 src/AggregateFunctions/AggregateFunctionSum.cpp      |  2 +-
 src/AggregateFunctions/AggregateFunctionSumCount.cpp |  2 +-
 src/AggregateFunctions/AggregateFunctionTopK.cpp     |  2 +-
 src/Common/UTF8Helpers.cpp                           |  2 +-
 src/Dictionaries/IPAddressDictionary.cpp             | 12 ++++++------
 src/Functions/FunctionHelpers.cpp                    |  2 +-
 src/Functions/FunctionsLogical.cpp                   |  6 +++---
 src/Functions/array/arrayElement.cpp                 |  2 +-
 src/Functions/if.cpp                                 |  6 +++---
 src/Functions/toLowCardinality.cpp                   |  1 -
 src/Interpreters/JIT/compileFunction.cpp             |  2 +-
 src/Interpreters/convertFieldToType.cpp              | 12 ++++++------
 src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp    |  3 ++-
 src/Storages/FileLog/StorageFileLog.cpp              |  2 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp        |  2 +-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp       |  2 +-
 src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp |  2 +-
 .../MergeTree/MergeTreeIndexGranuleBloomFilter.cpp   |  2 +-
 src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp      |  2 +-
 29 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/contrib/sysroot b/contrib/sysroot
index bbcac834526..e9fb375d0a1 160000
--- a/contrib/sysroot
+++ b/contrib/sysroot
@@ -1 +1 @@
-Subproject commit bbcac834526d90d1e764164b861be426891d1743
+Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index 7393fcd8d36..a9eb27c291c 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
 
     /// Create the 'need_rebuild_lists.mark' file.
     /// This file will be used later to find out if writing lists is successful or not.
-    std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
+    std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
+    out.close();
 
     lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
     lists_writing_thread_is_waiting = true;
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
index ab6fdc8fd7e..4d7901a7fac 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
     }
 
 template <class First, class ... TArgs>
-static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
+IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
 {
     const WhichDataType which(second_type);
 
@@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a
 
 // Not using helper functions because there are no templates for binary decimal/numeric function.
 template <class... TArgs>
-static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
+IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
 {
     const WhichDataType which(first_type);
 
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index f1c6e7c6112..3b43d9a85f8 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
         throw Exception("Incorrect number of arguments for aggregate function " + name,
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    DataTypePtr data_type = arguments[0];
+    const DataTypePtr & data_type = arguments[0];
 
     if (isInteger(data_type) || isFloat(data_type))
         return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 5a9fd778277..85075d5a4d6 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -20,7 +20,7 @@ namespace
 {
 
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
-static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
+IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index 331013e6f38..0eff1ae516f 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -19,7 +19,7 @@ namespace ErrorCodes
 namespace
 {
     template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
-    static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
+    IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
     {
         WhichDataType which(argument_type);
         if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
index 7709357189c..da934531f96 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
@@ -40,7 +40,7 @@ public:
 };
 
 template <typename HasLimit, typename ... TArgs>
-static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
+IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate<HasLimit>(argument_type, std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
index 5f84eb9b271..6783a55418a 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -60,7 +60,7 @@ template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted
 template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
 
 template <template <typename, bool> class Function>
-static constexpr bool supportDecimal()
+constexpr bool supportDecimal()
 {
     return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
         std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
@@ -75,7 +75,7 @@ static constexpr bool supportDecimal()
 }
 
 template <template <typename, bool> class Function>
-static constexpr bool supportBigInt()
+constexpr bool supportBigInt()
 {
     return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
         std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
index 7f1196173a7..15567ee229d 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
@@ -19,7 +19,7 @@ namespace
 {
 
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
-static IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
+IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date || which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
index d01e23afe24..4af21f9542c 100644
--- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
+++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
@@ -24,7 +24,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (isDecimal(data_type))
         res.reset(createWithDecimalType<FunctionTemplate>(*data_type, *data_type, argument_types));
     else
diff --git a/src/AggregateFunctions/AggregateFunctionSum.cpp b/src/AggregateFunctions/AggregateFunctionSum.cpp
index 6ebd006890b..8e0b941e40f 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSum.cpp
@@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (isDecimal(data_type))
         res.reset(createWithDecimalType<Function>(*data_type, *data_type, argument_types));
     else
diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.cpp b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
index b39e292e0b9..f864469ced2 100644
--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@@ -29,7 +29,7 @@ createAggregateFunctionSumCount(const std::string & name, const DataTypes & argu
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (!allowType(data_type))
         throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
             ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp
index 801f3d5e28d..4ebc80aceb5 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@@ -44,7 +44,7 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
 
 
 template <bool is_weighted>
-static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
+IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
 {
     if (argument_types.empty())
         throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");
diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp
index 873bd0b5c0e..b1d38c4f31a 100644
--- a/src/Common/UTF8Helpers.cpp
+++ b/src/Common/UTF8Helpers.cpp
@@ -100,7 +100,7 @@ enum ComputeWidthMode
 };
 
 template <ComputeWidthMode mode>
-static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
+size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
 {
     UTF8Decoder decoder;
     size_t width = 0;
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 929b04d14fa..e0bd8ebcbb9 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -288,8 +288,8 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const
 {
     validateKeyTypes(key_types);
 
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
 
     auto result = ColumnUInt8::create(rows);
     auto & out = result->getData();
@@ -613,8 +613,8 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
     auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
 
     if (const auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@@ -694,8 +694,8 @@ void IPAddressDictionary::getItemsImpl(
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
 
     // special case for getBlockInputStream
     if (unlikely(key_columns.size() == 2))
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index e44962f4c38..74045cd9f12 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -128,7 +128,7 @@ void validateArgumentsImpl(const IFunction & func,
         }
 
         const auto & arg = arguments[i + argument_offset];
-        const auto descriptor = descriptors[i];
+        const auto & descriptor = descriptors[i];
         if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
             throw Exception("Illegal type of argument #" + std::to_string(argument_offset + i + 1) // +1 is for human-friendly 1-based indexing
                             + (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{})
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index c709cd22880..c478446e744 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -95,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res)
 
 
 template <class Op, typename Func>
-static bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
+bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
 {
     bool has_res = false;
 
@@ -345,7 +345,7 @@ struct OperationApplier<Op, OperationApplierImpl, 0>
 
 
 template <class Op>
-static ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
+ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
 {
     /// Combine all constant columns into a single constant value.
     UInt8 const_3v_value = 0;
@@ -420,7 +420,7 @@ struct TypedExecutorInvoker<Op>
 
 /// Types of all of the arguments are guaranteed to be non-nullable here
 template <class Op>
-static ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
+ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
 {
     /// Combine all constant columns into a single constant value.
     UInt8 const_val = 0;
diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index 887203ae58e..04195a7eef9 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -146,7 +146,7 @@ public:
 
     void update(size_t from)
     {
-        sink_null_map[index] = bool(src_null_map && src_null_map[from]);
+        sink_null_map[index] = src_null_map && src_null_map[from];
         ++index;
     }
 
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 82448966b8c..95c66c20541 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -45,7 +45,7 @@ using namespace GatherUtils;
   */
 
 template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
-static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
+inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool a_is_short = a.size() < size;
@@ -77,7 +77,7 @@ static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, co
 }
 
 template <typename ArrayCond, typename ArrayA, typename B, typename ArrayResult, typename ResultType>
-static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
+inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool a_is_short = a.size() < size;
@@ -95,7 +95,7 @@ static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a,
 }
 
 template <typename ArrayCond, typename A, typename ArrayB, typename ArrayResult, typename ResultType>
-static inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
+inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool b_is_short = b.size() < size;
diff --git a/src/Functions/toLowCardinality.cpp b/src/Functions/toLowCardinality.cpp
index b66f2ad90fd..eff01b144d9 100644
--- a/src/Functions/toLowCardinality.cpp
+++ b/src/Functions/toLowCardinality.cpp
@@ -35,7 +35,6 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t /*input_rows_count*/) const override
     {
-        auto arg_num = arguments[0];
         const auto & arg = arguments[0];
 
         if (arg.type->lowCardinality())
diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp
index aaf722b505e..4dac65046a7 100644
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@@ -201,7 +201,7 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
     for (size_t i = 0; i < arg_types.size(); ++i)
     {
         auto & column = columns[i];
-        auto type = arg_types[i];
+        auto & type = arg_types[i];
 
         auto * value = b.CreateLoad(toNativeType(b, removeNullable(type)), column.data);
         if (!type->isNullable())
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 7abe8342100..79f41d77526 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -52,7 +52,7 @@ namespace
 {
 
 template <typename From, typename To>
-static Field convertNumericTypeImpl(const Field & from)
+Field convertNumericTypeImpl(const Field & from)
 {
     To result;
     if (!accurate::convertNumeric(from.get<From>(), result))
@@ -61,7 +61,7 @@ static Field convertNumericTypeImpl(const Field & from)
 }
 
 template <typename To>
-static Field convertNumericType(const Field & from, const IDataType & type)
+Field convertNumericType(const Field & from, const IDataType & type)
 {
     if (from.getType() == Field::Types::UInt64 || from.getType() == Field::Types::Bool)
         return convertNumericTypeImpl<UInt64, To>(from);
@@ -84,7 +84,7 @@ static Field convertNumericType(const Field & from, const IDataType & type)
 
 
 template <typename From, typename T>
-static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     From value = from.get<From>();
     if (!type.canStoreWhole(value))
@@ -96,7 +96,7 @@ static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T
 
 
 template <typename T>
-static Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     const String & str_value = from.get<String>();
     T value = type.parseFromString(str_value);
@@ -104,7 +104,7 @@ static Field convertStringToDecimalType(const Field & from, const DataTypeDecima
 }
 
 template <typename From, typename T>
-static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     auto field = from.get<DecimalField<From>>();
     T value = convertDecimals<DataTypeDecimal<From>, DataTypeDecimal<T>>(field.getValue(), field.getScale(), type.getScale());
@@ -112,7 +112,7 @@ static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecim
 }
 
 template <typename To>
-static Field convertDecimalType(const Field & from, const To & type)
+Field convertDecimalType(const Field & from, const To & type)
 {
     if (from.getType() == Field::Types::UInt64)
         return convertIntToDecimalType<UInt64>(from, type);
diff --git a/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
index f6c42171d5b..a682a5ef8e8 100644
--- a/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
+++ b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
@@ -1,6 +1,7 @@
 #include <Parsers/Access/ASTShowAccessEntitiesQuery.h>
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
+#include <fmt/format.h>
 
 
 namespace DB
@@ -20,7 +21,7 @@ String ASTShowAccessEntitiesQuery::getKeyword() const
 
 String ASTShowAccessEntitiesQuery::getID(char) const
 {
-    return "SHOW " + String(getKeyword()) + " query";
+    return fmt::format("SHOW {} query", getKeyword());
 }
 
 void ASTShowAccessEntitiesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp
index 47490aae75b..82412b1e649 100644
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@@ -763,7 +763,7 @@ void registerStorageFileLog(StorageFactory & factory)
 
         if (!num_threads) /// Default
         {
-            num_threads = std::max(unsigned(1), physical_cpu_cores / 4);
+            num_threads = std::max(1U, physical_cpu_cores / 4);
             filelog_settings->set("max_threads", num_threads);
         }
         else if (num_threads > physical_cpu_cores)
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 60eb11a4fc0..a6c78d3107d 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -987,7 +987,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
             }
         };
 
-        size_t num_threads = std::min(size_t(num_streams), parts.size());
+        size_t num_threads = std::min<size_t>(num_streams, parts.size());
 
         if (num_threads <= 1)
         {
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index e39ae7a4037..24ca7cc2f5a 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -180,7 +180,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
     {
         Row partition(partition_columns.size());
         for (size_t i = 0; i < partition_columns.size(); ++i)
-            partition[i] = Field((*partition_columns[i])[partition_num_to_first_row[num]]);
+            partition[i] = (*partition_columns[i])[partition_num_to_first_row[num]];
         return partition;
     };
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 4292655dddc..33668b96a60 100644
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -103,7 +103,7 @@ MergeTreeIndexPtr bloomFilterIndexCreatorNew(
     if (!index.arguments.empty())
     {
         const auto & argument = index.arguments[0];
-        max_conflict_probability = std::min(Float64(1), std::max(argument.safeGet<Float64>(), Float64(0)));
+        max_conflict_probability = std::min<Float64>(1.0, std::max<Float64>(argument.safeGet<Float64>(), 0.0));
     }
 
     const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
index 9bc00274740..f80d40d2fa8 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
@@ -22,7 +22,7 @@ static void assertGranuleBlocksStructure(const Blocks & granule_index_blocks)
     Block prev_block;
     for (size_t index = 0; index < granule_index_blocks.size(); ++index)
     {
-        Block granule_index_block = granule_index_blocks[index];
+        const Block & granule_index_block = granule_index_blocks[index];
 
         if (index != 0)
             assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure.");
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 05b30bb014e..71440591c3a 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -464,7 +464,7 @@ Pipe StorageEmbeddedRocksDB::read(
         Pipes pipes;
 
         size_t num_keys = keys->size();
-        size_t num_threads = std::min(size_t(num_streams), keys->size());
+        size_t num_threads = std::min<size_t>(num_streams, keys->size());
 
         assert(num_keys <= std::numeric_limits<uint32_t>::max());
         assert(num_threads <= std::numeric_limits<uint32_t>::max());

From 0660d244315ed3e49d698ae420eec4634d8e766d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 16 Apr 2022 10:27:24 +0200
Subject: [PATCH 084/110] Add dispatch trigger for debug CI

---
 .github/workflows/debug.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml
index 7bb5ac65140..fa980a95a39 100644
--- a/.github/workflows/debug.yml
+++ b/.github/workflows/debug.yml
@@ -2,7 +2,7 @@
 name: Debug
 
 'on':
-  [push, pull_request, release]
+  [push, pull_request, release, workflow_dispatch]
 
 jobs:
   DebugInfo:

From b6a39e78d059ec672edb90e486a74f46cb74b119 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 16 Apr 2022 10:34:40 +0200
Subject: [PATCH 085/110] Clean out old packaging

---
 debian/.gitignore                       |  18 --
 debian/.pbuilderrc                      | 223 ------------------
 debian/changelog                        |   5 -
 debian/changelog.in                     |   5 -
 debian/clickhouse-client.install        |   7 -
 debian/clickhouse-common-static.install |   5 -
 debian/clickhouse-server.cron.d         |   1 -
 debian/clickhouse-server.docs           |   4 -
 debian/clickhouse-server.init           | 227 ------------------
 debian/clickhouse-server.install        |   6 -
 debian/clickhouse-server.postinst       |  47 ----
 debian/clickhouse-server.service        |  27 ---
 debian/compat                           |   1 -
 debian/control                          |  58 -----
 debian/rules                            | 132 -----------
 debian/source/format                    |   1 -
 debian/source/options                   |   9 -
 debian/watch                            |   6 -
 release                                 |  99 --------
 utils/release/push_packages             | 250 --------------------
 utils/release/release_lib.sh            | 300 ------------------------
 21 files changed, 1431 deletions(-)
 delete mode 100644 debian/.gitignore
 delete mode 100644 debian/.pbuilderrc
 delete mode 100644 debian/changelog
 delete mode 100644 debian/changelog.in
 delete mode 100644 debian/clickhouse-client.install
 delete mode 100644 debian/clickhouse-common-static.install
 delete mode 100644 debian/clickhouse-server.cron.d
 delete mode 100644 debian/clickhouse-server.docs
 delete mode 100755 debian/clickhouse-server.init
 delete mode 100644 debian/clickhouse-server.install
 delete mode 100644 debian/clickhouse-server.postinst
 delete mode 100644 debian/clickhouse-server.service
 delete mode 100644 debian/compat
 delete mode 100644 debian/control
 delete mode 100755 debian/rules
 delete mode 100644 debian/source/format
 delete mode 100644 debian/source/options
 delete mode 100644 debian/watch
 delete mode 100755 release
 delete mode 100755 utils/release/push_packages
 delete mode 100644 utils/release/release_lib.sh

diff --git a/debian/.gitignore b/debian/.gitignore
deleted file mode 100644
index b4432556de7..00000000000
--- a/debian/.gitignore
+++ /dev/null
@@ -1,18 +0,0 @@
-control
-copyright
-tmp/
-clickhouse-benchmark/
-clickhouse-client.docs
-clickhouse-client/
-clickhouse-common-static-dbg/
-clickhouse-common-static.docs
-clickhouse-common-static/
-clickhouse-server-base/
-clickhouse-server-common/
-clickhouse-server/
-debhelper-build-stamp
-files
-*.debhelper.log
-*.debhelper
-*.substvars
-
diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc
deleted file mode 100644
index 485906f6198..00000000000
--- a/debian/.pbuilderrc
+++ /dev/null
@@ -1,223 +0,0 @@
-#
-# sudo apt install pbuilder fakeroot debhelper debian-archive-keyring debian-keyring
-#
-# ubuntu:
-# prepare old (trusty or earlier) host system:
-
-# sudo ln -s gutsy /usr/share/debootstrap/scripts/eoan
-# sudo ln -s gutsy /usr/share/debootstrap/scripts/disco
-# sudo ln -s gutsy /usr/share/debootstrap/scripts/cosmic
-# sudo ln -s gutsy /usr/share/debootstrap/scripts/artful
-# sudo ln -s gutsy /usr/share/debootstrap/scripts/bionic
-# sudo ln -s sid /usr/share/debootstrap/scripts/buster
-# build ubuntu:
-# sudo DIST=bionic pbuilder create --configfile debian/.pbuilderrc && DIST=bionic pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=cosmic pbuilder create --configfile debian/.pbuilderrc && DIST=cosmic pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=disco  pbuilder create --configfile debian/.pbuilderrc && DIST=disco  pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=eoan   pbuilder create --configfile debian/.pbuilderrc && DIST=eoan   pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=devel  pbuilder create --configfile debian/.pbuilderrc && DIST=devel  pdebuild --configfile debian/.pbuilderrc
-# build debian:
-# sudo DIST=stable       pbuilder create --configfile debian/.pbuilderrc && DIST=stable       pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=testing      pbuilder create --configfile debian/.pbuilderrc && DIST=testing      pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=unstable     pbuilder create --configfile debian/.pbuilderrc && DIST=unstable     pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc
-# build i386 experimental:
-# sudo DIST=trusty       ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=trusty       ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=xenial       ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=xenial       ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=zesty        ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=zesty        ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=artful       ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=artful       ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=bionic       ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=bionic       ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=stable       ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=stable       ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=testing      ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=testing      ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# sudo DIST=experimental ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=experimental ARCH=i386 pdebuild --configfile debian/.pbuilderrc
-# test gcc-9
-# env DEB_CC=gcc-9 DEB_CXX=g++-9 EXTRAPACKAGES="g++-9 gcc-9" DIST=disco pdebuild --configfile debian/.pbuilderrc
-# use only clang:
-# env DEB_CC=clang-8 DEB_CXX=clang++-8 EXTRAPACKAGES=clang-8 DIST=disco pdebuild --configfile debian/.pbuilderrc
-# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES=clang-5.0 DIST=artful pdebuild --configfile debian/.pbuilderrc
-# clang+asan:
-# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DCMAKE_BUILD_TYPE=Asan" DIST=artful pdebuild --configfile debian/.pbuilderrc
-# clang+tsan:
-# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Tsan" DIST=artful pdebuild --configfile debian/.pbuilderrc
-# without sse for old systems and some VM:
-# env DH_VERBOSE=1 CMAKE_FLAGS="-DHAVE_SSE41=0 -DHAVE_SSE42=0 -DHAVE_POPCNT=0 -DHAVE_SSE2_INTRIN=0 -DSSE2FLAG=' ' -DHAVE_SSE42_INTRIN=0 -DSSE4FLAG=' ' -DHAVE_PCLMULQDQ_INTRIN=0 -DPCLMULFLAG=' '" DIST=artful pdebuild --configfile debian/.pbuilderrc
-
-# Note: on trusty host creating some future dists can fail (debootstrap error).
-
-# Your packages built here: /var/cache/pbuilder/*-*/result
-
-# from https://wiki.debian.org/PbuilderTricks :
-
-# Codenames for Debian suites according to their alias. Update these when
-# needed.
-UNSTABLE_CODENAME="sid"
-TESTING_CODENAME="buster"
-STABLE_CODENAME="stretch"
-STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports"
-
-# List of Debian suites.
-DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE
-    "experimental" "unstable" "testing" "stable")
-
-# List of Ubuntu suites. Update these when needed.
-UBUNTU_SUITES=("eoan" "disco" "cosmic" "bionic" "artful" "zesty" "xenial" "trusty" "devel")
-
-# Set a default distribution if none is used. Note that you can set your own default (i.e. ${DIST:="unstable"}).
-HOST_DIST=`lsb_release --short --codename`
-: ${DIST:="$HOST_DIST"}
-
-# Optionally change Debian codenames in $DIST to their aliases.
-case "$DIST" in
-    $UNSTABLE_CODENAME)
-        DIST="unstable"
-        ;;
-    $TESTING_CODENAME)
-        DIST="testing"
-        ;;
-    $STABLE_CODENAME)
-        DIST="stable"
-        ;;
-esac
-
-# Optionally set the architecture to the host architecture if none set. Note
-# that you can set your own default (i.e. ${ARCH:="i386"}).
-: ${ARCH:="$(dpkg --print-architecture)"}
-
-NAME="$DIST"
-if [ -n "${ARCH}" ]; then
-    NAME="$NAME-$ARCH"
-    DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}")
-fi
-
-BASETGZ=${SET_BASETGZ}
-BASETGZ=${BASETGZ:="/var/cache/pbuilder/$NAME-base.tgz"}
-DISTRIBUTION="$DIST"
-BUILDRESULT=${SET_BUILDRESULT}
-BUILDRESULT=${BUILDRESULT:="/var/cache/pbuilder/$NAME/result/"}
-APTCACHE="/var/cache/pbuilder/$NAME/aptcache/"
-BUILDPLACE="/var/cache/pbuilder/build/"
-ALLOWUNTRUSTED=${SET_ALLOWUNTRUSTED:=${ALLOWUNTRUSTED}}
-
-#DEBOOTSTRAPOPTS=( '--variant=buildd' $SET_DEBOOTSTRAPOPTS )
-
-
-if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then
-    # Debian configuration
-    OSNAME=debian
-    MIRRORSITE=${SET_MIRRORSITE="http://deb.debian.org/$OSNAME/"}
-    COMPONENTS="main contrib non-free"
-    if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then
-        OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS"
-    fi
-    # APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg
-
-    case "$HOST_DIST" in
-       "trusty" )
-            DEBOOTSTRAPOPTS+=( '--no-check-gpg' )
-        ;;
-        *)
-            DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' )
-            # DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-keyring.gpg' )
-    esac
-elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then
-    # Ubuntu configuration
-    OSNAME=ubuntu
-
-    if [[ "$ARCH" == "amd64" || "$ARCH" == "i386" ]]; then
-        MIRRORSITE=${SET_MIRRORSITE="http://archive.ubuntu.com/$OSNAME/"}
-    else
-        MIRRORSITE=${SET_MIRRORSITE="http://ports.ubuntu.com/ubuntu-ports/"}
-    fi
-
-    COMPONENTS="main restricted universe multiverse"
-
-    OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-updates main restricted universe multiverse"
-    OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-security main restricted universe multiverse"
-    OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-proposed main restricted universe multiverse"
-
-    case "$DIST" in
-       "trusty" | "xenial" )
-           OTHERMIRROR="$OTHERMIRROR | deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main"
-           ALLOWUNTRUSTED=yes
-           ;;
-    esac
-
-    # deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main
-else
-    echo "Unknown distribution: $DIST"
-    exit 1
-fi
-
-echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME $MIRRORSITE"
-
-case "$DIST" in
-    "trusty")
-        # ccache broken
-        ;;
-    *)
-        CCACHEDIR=${SET_CCACHEDIR:="/var/cache/pbuilder/ccache"}
-        ;;
-esac
-
-# old systems with default gcc <= 6
-case "$DIST" in
-    "trusty" | "xenial" | "stable" )
-        export DEB_CC=gcc-7
-        export DEB_CXX=g++-7
-        ;;
-esac
-
-if [ "$ARCH" != arm64 ]; then
-    case "$DIST" in
-# TODO: fix llvm-8 and use for "disco" and "eoan"
-    "experimental")
-        EXTRAPACKAGES+=" liblld-8-dev libclang-8-dev llvm-8-dev liblld-8 "
-        export CMAKE_FLAGS="-DLLVM_VERSION=8 $CMAKE_FLAGS"
-        ;;
-    "eoan" | "disco" | "cosmic" | "testing" | "unstable")
-        EXTRAPACKAGES+=" liblld-7-dev libclang-7-dev llvm-7-dev liblld-7 "
-        export CMAKE_FLAGS="-DLLVM_VERSION=7 $CMAKE_FLAGS"
-        ;;
-    "bionic")
-        EXTRAPACKAGES+=" liblld-6.0-dev libclang-6.0-dev liblld-6.0 "
-        export CMAKE_FLAGS="-DLLVM_VERSION=6 $CMAKE_FLAGS"
-        ;;
-    "artful" )
-        EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 "
-        ;;
-    esac
-else
-    export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=0 $CMAKE_FLAGS"
-fi
-
-# Will test symbols
-#EXTRAPACKAGES+=" gdb "
-
-# For killall in pbuilder-hooks:
-EXTRAPACKAGES+=" psmisc "
-
-[[ $CCACHE_PREFIX == 'distcc' ]] && EXTRAPACKAGES+=" $CCACHE_PREFIX " && USENETWORK=yes && export DISTCC_DIR=/var/cache/pbuilder/distcc
-
-[[ $ARCH == 'i386' ]] && EXTRAPACKAGES+=" libssl-dev "
-
-export DEB_BUILD_OPTIONS=parallel=`nproc`
-
-# Floating bug with permissions:
-[ -n "$CCACHEDIR" ] && sudo mkdir -p $CCACHEDIR
-[ -n "$CCACHEDIR" ] && sudo chmod -R a+rwx $CCACHEDIR || true
-# chown -R $BUILDUSERID:$BUILDUSERID $CCACHEDIR
-
-
-# Do not create source package inside pbuilder (-b)
-# Use current dir to make package (by default should have src archive)
-# echo "3.0 (native)" > debian/source/format
-# OR
-# pdebuild -b --debbuildopts "--source-option=--format=\"3.0 (native)\""
-# OR
-DEBBUILDOPTS="-b --source-option=--format=\"3.0 (native)\""
-
-HOOKDIR="debian/pbuilder-hooks"
-
-#echo "DEBOOTSTRAPOPTS=${DEBOOTSTRAPOPTS[@]}"
-#echo "ALLOWUNTRUSTED=${ALLOWUNTRUSTED} OTHERMIRROR=${OTHERMIRROR}"
-#echo "EXTRAPACKAGES=${EXTRAPACKAGES}"
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 43b46f561c8..00000000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-clickhouse (22.1.1.1) unstable; urgency=low
-
-  * Modified source code
-
- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Thu, 09 Dec 2021 00:32:58 +0300
diff --git a/debian/changelog.in b/debian/changelog.in
deleted file mode 100644
index ce2ee757999..00000000000
--- a/debian/changelog.in
+++ /dev/null
@@ -1,5 +0,0 @@
-clickhouse (@VERSION_STRING@) unstable; urgency=low
-
-  * Modified source code
-
- -- @AUTHOR@ <@EMAIL@>  @DATE@
diff --git a/debian/clickhouse-client.install b/debian/clickhouse-client.install
deleted file mode 100644
index f19f937b318..00000000000
--- a/debian/clickhouse-client.install
+++ /dev/null
@@ -1,7 +0,0 @@
-usr/bin/clickhouse-client
-usr/bin/clickhouse-local
-usr/bin/clickhouse-compressor
-usr/bin/clickhouse-benchmark
-usr/bin/clickhouse-format
-usr/bin/clickhouse-obfuscator
-etc/clickhouse-client/config.xml
diff --git a/debian/clickhouse-common-static.install b/debian/clickhouse-common-static.install
deleted file mode 100644
index 087a6dbba8f..00000000000
--- a/debian/clickhouse-common-static.install
+++ /dev/null
@@ -1,5 +0,0 @@
-usr/bin/clickhouse
-usr/bin/clickhouse-odbc-bridge
-usr/bin/clickhouse-library-bridge
-usr/bin/clickhouse-extract-from-config
-usr/share/bash-completion/completions
diff --git a/debian/clickhouse-server.cron.d b/debian/clickhouse-server.cron.d
deleted file mode 100644
index 1e5d4aab733..00000000000
--- a/debian/clickhouse-server.cron.d
+++ /dev/null
@@ -1 +0,0 @@
-#*/10 * * * * root ((which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart) > /dev/null 2>&1
diff --git a/debian/clickhouse-server.docs b/debian/clickhouse-server.docs
deleted file mode 100644
index e12d6533be2..00000000000
--- a/debian/clickhouse-server.docs
+++ /dev/null
@@ -1,4 +0,0 @@
-LICENSE
-AUTHORS
-README.md
-CHANGELOG.md
diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init
deleted file mode 100755
index 1695f6286b8..00000000000
--- a/debian/clickhouse-server.init
+++ /dev/null
@@ -1,227 +0,0 @@
-#!/bin/sh
-### BEGIN INIT INFO
-# Provides:          clickhouse-server
-# Default-Start:     2 3 4 5
-# Default-Stop:      0 1 6
-# Should-Start:      $time $network
-# Should-Stop:       $network
-# Short-Description: clickhouse-server daemon
-### END INIT INFO
-#
-# NOTES:
-# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
-#
-# For the documentation [1]:
-#
-#   [1]: https://wiki.debian.org/LSBInitScripts
-
-CLICKHOUSE_USER=clickhouse
-CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
-SHELL=/bin/bash
-PROGRAM=clickhouse-server
-CLICKHOUSE_GENERIC_PROGRAM=clickhouse
-CLICKHOUSE_PROGRAM_ENV=""
-EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
-CLICKHOUSE_CONFDIR=/etc/$PROGRAM
-CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
-CLICKHOUSE_LOGDIR_USER=root
-CLICKHOUSE_DATADIR=/var/lib/clickhouse
-if [ -d "/var/lock" ]; then
-    LOCALSTATEDIR=/var/lock
-else
-    LOCALSTATEDIR=/run/lock
-fi
-
-if [ ! -d "$LOCALSTATEDIR" ]; then
-    mkdir -p "$LOCALSTATEDIR"
-fi
-
-CLICKHOUSE_BINDIR=/usr/bin
-CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
-CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
-LOCKFILE=$LOCALSTATEDIR/$PROGRAM
-CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
-CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
-# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
-
-# Some systems lack "flock"
-command -v flock >/dev/null && FLOCK=flock
-
-# Override defaults from optional config file
-test -f /etc/default/clickhouse && . /etc/default/clickhouse
-
-
-die()
-{
-    echo $1 >&2
-    exit 1
-}
-
-
-# Check that configuration file is Ok.
-check_config()
-{
-    if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
-        su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
-    fi
-}
-
-
-initdb()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
-}
-
-
-start()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
-}
-
-
-stop()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
-}
-
-
-restart()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
-}
-
-
-forcestop()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
-}
-
-
-service_or_func()
-{
-    if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
-        systemctl $1 $PROGRAM
-    else
-        $1
-    fi
-}
-
-forcerestart()
-{
-    forcestop
-    # Should not use 'start' function if systemd active
-    service_or_func start
-}
-
-use_cron()
-{
-    # 1. running systemd
-    if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
-        return 1
-    fi
-    # 2. disabled by config
-    if [ -z "$CLICKHOUSE_CRONFILE" ]; then
-        return 2
-    fi
-    return 0
-}
-# returns false if cron disabled (with systemd)
-enable_cron()
-{
-    use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
-}
-# returns false if cron disabled (with systemd)
-disable_cron()
-{
-    use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
-}
-
-
-is_cron_disabled()
-{
-    use_cron || return 0
-
-    # Assumes that either no lines are commented or all lines are commented.
-    # Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
-    grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
-}
-
-
-main()
-{
-    # See how we were called.
-    EXIT_STATUS=0
-    case "$1" in
-    start)
-        service_or_func start && enable_cron
-        ;;
-    stop)
-        disable_cron
-        service_or_func stop
-        ;;
-    restart)
-        service_or_func restart && enable_cron
-        ;;
-    forcestop)
-        disable_cron
-        forcestop
-        ;;
-    forcerestart)
-        forcerestart && enable_cron
-        ;;
-    reload)
-        service_or_func restart
-        ;;
-    condstart)
-        service_or_func start
-        ;;
-    condstop)
-        service_or_func stop
-        ;;
-    condrestart)
-        service_or_func restart
-        ;;
-    condreload)
-        service_or_func restart
-        ;;
-    initdb)
-        initdb
-        ;;
-    enable_cron)
-        enable_cron
-        ;;
-    disable_cron)
-        disable_cron
-        ;;
-    *)
-        echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
-        exit 2
-        ;;
-    esac
-
-    exit $EXIT_STATUS
-}
-
-
-status()
-{
-    ${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
-}
-
-
-# Running commands without need of locking
-case "$1" in
-status)
-    status
-    exit 0
-    ;;
-esac
-
-
-(
-    if $FLOCK -n 9; then
-        main "$@"
-    else
-        echo "Init script is already running" && exit 1
-    fi
-) 9> $LOCKFILE
diff --git a/debian/clickhouse-server.install b/debian/clickhouse-server.install
deleted file mode 100644
index b1475fdf162..00000000000
--- a/debian/clickhouse-server.install
+++ /dev/null
@@ -1,6 +0,0 @@
-usr/bin/clickhouse-server
-usr/bin/clickhouse-copier
-usr/bin/clickhouse-report
-etc/clickhouse-server/config.xml
-etc/clickhouse-server/users.xml
-etc/systemd/system/clickhouse-server.service
diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst
deleted file mode 100644
index 419c13e3daf..00000000000
--- a/debian/clickhouse-server.postinst
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/sh
-set -e
-# set -x
-
-PROGRAM=clickhouse-server
-CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
-CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
-# Please note that we don't support paths with whitespaces. This is rather ignorant.
-CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
-CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
-CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
-CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
-CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
-EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
-CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
-CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
-
-[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
-[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
-
-if [ ! -f "/etc/debian_version" ]; then
-    not_deb_os=1
-fi
-
-if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
-
-    ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
-
-    if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
-        # if old rc.d service present - remove it
-        if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
-            /usr/sbin/update-rc.d clickhouse-server remove
-        fi
-
-        /bin/systemctl daemon-reload
-        /bin/systemctl enable clickhouse-server
-    else
-        # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
-        if [ -x "/etc/init.d/clickhouse-server" ]; then
-            if [ -x "/usr/sbin/update-rc.d" ]; then
-                /usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
-            else
-                echo # Other OS
-            fi
-        fi
-    fi
-fi
diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service
deleted file mode 100644
index 028b4fbf8ab..00000000000
--- a/debian/clickhouse-server.service
+++ /dev/null
@@ -1,27 +0,0 @@
-[Unit]
-Description=ClickHouse Server (analytic DBMS for big data)
-Requires=network-online.target
-# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
-# that the time was adjusted already, if you use systemd-timesyncd you are
-# safe, but if you use ntp or some other daemon, you should configure it
-# additionaly.
-After=time-sync.target network-online.target
-Wants=time-sync.target
-
-[Service]
-Type=simple
-User=clickhouse
-Group=clickhouse
-Restart=always
-RestartSec=30
-RuntimeDirectory=clickhouse-server
-ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
-# Minus means that this file is optional.
-EnvironmentFile=-/etc/default/clickhouse
-LimitCORE=infinity
-LimitNOFILE=500000
-CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
-
-[Install]
-# ClickHouse should not start from the rescue shell (rescue.target).
-WantedBy=multi-user.target
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28b8ab..00000000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index c5d98d98f41..00000000000
--- a/debian/control
+++ /dev/null
@@ -1,58 +0,0 @@
-Source: clickhouse
-Section: database
-Priority: optional
-Maintainer: Alexey Milovidov <milovidov@clickhouse.com>
-Build-Depends: debhelper (>= 9),
-               cmake | cmake3,
-               ninja-build,
-               clang-13,
-               llvm-13,
-               lld-13,
-               libc6-dev,
-               tzdata
-Standards-Version: 3.9.8
-
-Package: clickhouse-client
-Architecture: all
-Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version})
-Replaces: clickhouse-compressor
-Conflicts: clickhouse-compressor
-Description: Client binary for ClickHouse
- ClickHouse is a column-oriented database management system
- that allows generating analytical data reports in real time.
- .
- This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
-
-Package: clickhouse-common-static
-Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}
-Suggests: clickhouse-common-static-dbg
-Replaces: clickhouse-common, clickhouse-server-base
-Provides: clickhouse-common, clickhouse-server-base
-Description: Common files for ClickHouse
- ClickHouse is a column-oriented database management system
- that allows generating analytical data reports in real time.
- .
- This package provides common files for both clickhouse server and client
-
-Package: clickhouse-server
-Architecture: all
-Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
-Recommends: libcap2-bin
-Replaces: clickhouse-server-common, clickhouse-server-base
-Provides: clickhouse-server-common
-Description: Server binary for ClickHouse
- ClickHouse is a column-oriented database management system
- that allows generating analytical data reports in real time.
- .
- This package provides clickhouse common configuration files
-
-Package: clickhouse-common-static-dbg
-Architecture: any
-Section: debug
-Priority: optional
-Depends: ${misc:Depends}
-Replaces: clickhouse-common-dbg
-Conflicts: clickhouse-common-dbg
-Description: debugging symbols for clickhouse-common-static
- This package contains the debugging symbols for clickhouse-common.
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 414d472c13d..00000000000
--- a/debian/rules
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-
-# Uncomment this to turn on verbose mode.
-export DH_VERBOSE=1
-
-# -pie only for static mode
-export DEB_BUILD_MAINT_OPTIONS=hardening=-all
-
-# because copy_headers.sh have hardcoded path to build/include_directories.txt
-BUILDDIR = obj-$(DEB_HOST_GNU_TYPE)
-CURDIR = $(shell pwd)
-DESTDIR = $(CURDIR)/debian/tmp
-
-DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
-
-ifeq ($(CCACHE_PREFIX),distcc)
-    THREADS_COUNT=$(shell distcc -j)
-endif
-ifeq ($(THREADS_COUNT),)
-    THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4)
-endif
-DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)
-
-ifndef ENABLE_TESTS
-    CMAKE_FLAGS += -DENABLE_TESTS=0
-else
-# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
-    DEB_BUILD_OPTIONS+= nocheck
-    DEB_BUILD_OPTIONS+= nostrip
-endif
-
-ifndef MAKE_TARGET
-    MAKE_TARGET = clickhouse-bundle
-endif
-
-CMAKE_FLAGS += -DENABLE_UTILS=0
-
-DEB_CC ?= $(shell which gcc-11 gcc-10 gcc-9 gcc | head -n1)
-DEB_CXX ?= $(shell which g++-11 g++-10 g++-9 g++ | head -n1)
-
-ifdef DEB_CXX
-    DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
-    DEB_HOST_GNU_TYPE  := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
-ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
-        CC := $(DEB_CC)
-        CXX := $(DEB_CXX)
-else ifeq (clang,$(findstring clang,$(DEB_CXX)))
-# If we crosscompile with clang, it knows what to do
-        CC := $(DEB_CC)
-        CXX := $(DEB_CXX)
-else
-        CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
-        CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
-endif
-endif
-
-ifdef CXX
-    CMAKE_FLAGS += -DCMAKE_CXX_COMPILER=`which $(CXX)`
-endif
-ifdef CC
-    CMAKE_FLAGS += -DCMAKE_C_COMPILER=`which $(CC)`
-endif
-
-ifndef DISABLE_NINJA
-    NINJA=$(shell which ninja)
-ifneq ($(NINJA),)
-        CMAKE_FLAGS += -GNinja
-        export MAKE=$(NINJA) $(NINJA_FLAGS)
-endif
-endif
-
-ifndef DH_VERBOSE
-    CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0
-endif
-
-# Useful for bulding on low memory systems
-ifndef DISABLE_PARALLEL
-    DH_FLAGS += --parallel
-else
-    THREADS_COUNT = 1
-endif
-
-%:
-	dh $@ $(DH_FLAGS) --buildsystem=cmake
-
-override_dh_auto_configure:
-	dh_auto_configure -- $(CMAKE_FLAGS)
-
-override_dh_auto_build:
-	# Fix for ninja. Do not add -O.
-	$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
-
-override_dh_auto_test:
-ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
-endif
-
-# Disable config.guess and config.sub update
-override_dh_update_autotools_config:
-
-override_dh_clean:
-	rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
-	dh_clean # -X contrib
-
-override_dh_strip:
-	#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
-ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
-	dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
-endif
-
-override_dh_install:
-	# Making docs
-	cp LICENSE debian/copyright
-
-	ln -sf clickhouse-server.docs debian/clickhouse-client.docs
-	ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs
-
-	# systemd compatibility
-	mkdir -p $(DESTDIR)/etc/systemd/system/
-	cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/
-
-	dh_install --list-missing --sourcedir=$(DESTDIR)
-
-override_dh_auto_install:
-	env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install
-
-override_dh_shlibdeps:
-	true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.
-
-override_dh_builddeb:
-	dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead.
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8d82b..00000000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/source/options b/debian/source/options
deleted file mode 100644
index 0ceb3b9e28b..00000000000
--- a/debian/source/options
+++ /dev/null
@@ -1,9 +0,0 @@
-tar-ignore
-tar-ignore="build_*/*"
-tar-ignore="workspace/*"
-tar-ignore="contrib/poco/openssl/*"
-tar-ignore="contrib/poco/gradle/*"
-tar-ignore="contrib/poco/Data/SQLite/*"
-tar-ignore="contrib/poco/PDF/*"
-compression-level=3
-compression=gzip
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index ed3cab97ade..00000000000
--- a/debian/watch
+++ /dev/null
@@ -1,6 +0,0 @@
-version=4
-
-opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \
-   https://github.com/ClickHouse/ClickHouse/tags \
-   (?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate
-
diff --git a/release b/release
deleted file mode 100755
index 3eb5591fe2c..00000000000
--- a/release
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env bash
-
-# If you have "no space left" error, you can change the location of temporary files with BUILDPLACE environment variable.
-
-# Version increment:
-# Default release: 18.1.2 -> 18.2.0:
-# ./release --version
-# or
-# ./release --version minor
-# Bugfix release (only with small patches to previous release): 18.1.2 -> 18.1.3:
-# ./release --version patch
-# Do this once per year: 18.1.2 -> 19.0.0:
-# ./release --version major
-
-set -e
-
-# Avoid dependency on locale
-LC_ALL=C
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-cd $CUR_DIR
-
-source "./utils/release/release_lib.sh"
-
-DEBUILD_NOSIGN_OPTIONS="-us -uc"
-DEBUILD_NODEPS_OPTIONS="-d"
-
-if [ -z "$VERSION_STRING" ] ; then
-    get_revision_author
-fi
-
-while [[ $1 == --* ]]
-do
-    if [[ $1 == '--test' ]]; then
-        TEST='yes'
-        VERSION_POSTFIX+=+test
-        shift
-    elif [[ $1 == '--check-build-dependencies' ]]; then
-        DEBUILD_NODEPS_OPTIONS=""
-        shift
-    elif [[ $1 == '--version' ]]; then
-        gen_revision_author $2
-        exit 0
-    elif [[ $1 == '--rpm' ]]; then
-        MAKE_RPM=1
-        shift
-    elif [[ $1 == '--tgz' ]]; then
-        MAKE_TGZ=1
-        shift
-    else
-        echo "Unknown option $1"
-        exit 2
-    fi
-done
-
-# Build options
-if [ -n "$SANITIZER" ]
-then
-    if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
-    elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
-    elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
-    elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
-    else
-        echo "Unknown value of SANITIZER variable: $SANITIZER"
-        exit 3
-    fi
-
-    export DEB_CC=${DEB_CC=clang-10}
-    export DEB_CXX=${DEB_CXX=clang++-10}
-    EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10"
-elif [[ $BUILD_TYPE == 'debug' ]]; then
-    CMAKE_BUILD_TYPE=Debug
-    VERSION_POSTFIX+="+debug"
-fi
-
-CMAKE_FLAGS=" $MALLOC_OPTS -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS"
-[[ -n "$CMAKE_BUILD_TYPE" ]] && CMAKE_FLAGS=" -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS"
-
-export CMAKE_FLAGS
-export EXTRAPACKAGES
-
-VERSION_STRING+=$VERSION_POSTFIX
-echo -e "\nCurrent version is $VERSION_STRING"
-
-if [ -z "$NO_BUILD" ] ; then
-    gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
-    # Build (only binary packages).
-    debuild --preserve-env -e PATH \
-    -e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-    -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG}
-fi
-
-if [ -n "$MAKE_RPM" ]; then
-    make_rpm
-fi
-
-if [ -n "$MAKE_TGZ" ]; then
-    make_tgz
-fi
diff --git a/utils/release/push_packages b/utils/release/push_packages
deleted file mode 100755
index e25cb325c71..00000000000
--- a/utils/release/push_packages
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import argparse
-import subprocess
-import os
-import logging
-import shutil
-import base64
-import pexpect
-
-
-# Do nothing if keys are not provided
-class GpgKey(object):
-    gnupg_dir = os.path.expanduser('~/.gnupg')
-    TEMPGNUPG_DIR = os.path.expanduser('~/.local/tempgnupg')
-
-    def __init__(self, secret_key_path, public_key_path):
-        if secret_key_path and public_key_path:
-            with open(secret_key_path, 'r') as sec, open(public_key_path, 'r') as pub:
-                self._secret_key = sec.read()
-                self._public_key = pub.read()
-        else:
-            self._secret_key = None
-            self._public_key = None
-
-    def __enter__(self):
-        if self._secret_key and self._public_key:
-            if os.path.exists(self.gnupg_dir):
-                shutil.move(self.gnupg_dir, self.TEMPGNUPG_DIR)
-            os.mkdir(self.gnupg_dir)
-            open(os.path.join(self.gnupg_dir, 'secring.gpg'), 'wb').write(base64.b64decode(self._secret_key))
-            open(os.path.join(self.gnupg_dir, 'pubring.gpg'), 'wb').write(base64.b64decode(self._public_key))
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self._secret_key and self._public_key:
-            shutil.rmtree(self.gnupg_dir)
-            if os.path.exists(self.TEMPGNUPG_DIR):
-                shutil.move(self.TEMPGNUPG_DIR, self.gnupg_dir)
-
-
-class DebRelease(object):
-
-    DUPLOAD_CONF_TEMPLATE = '\n\t'.join((
-        "$cfg{{'{title}'}} = {{",
-        'fqdn => "{fqdn}",',
-        'method => "{method}",',
-        'login => "{login}",',
-        'incoming => "{incoming}",',
-        'options => "{options}",',
-        'dinstall_runs => {dinstall_runs},\n}};',))
-    DUPLOAD_CONF_PATH = os.path.expanduser('~/.dupload.conf')
-    DUPLOAD_CONF_TMP_PATH = os.path.expanduser('~/.local/tmp_dupload.cnf')
-
-    def __init__(self, dupload_config, login, ssh_key_path):
-        self.__config = {}
-        for repo, conf in dupload_config.items():
-            d = {
-                "fqdn": conf["fqdn"],
-                "method": "scpb",
-                "login": login,
-                "incoming": conf["incoming"],
-                "dinstall_runs": 0,
-                "options": "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=3",
-            }
-            d.update(conf)
-            self.__config[repo] = d
-        print(self.__config)
-        self.ssh_key_path = ssh_key_path
-
-    def __enter__(self):
-        if os.path.exists(self.DUPLOAD_CONF_PATH):
-            shutil.move(self.DUPLOAD_CONF_PATH, self.DUPLOAD_CONF_TMP_PATH)
-        self.__dupload_conf = open(self.DUPLOAD_CONF_PATH, 'w')
-        self.__dupload_conf.write('package config;\n\n$default_host = undef;\n\n' + '\n\n'.join([
-            self.DUPLOAD_CONF_TEMPLATE.format(title=title, **values)
-            for title, values in self.__config.items()]))
-        self.__dupload_conf.write('\n')
-        self.__dupload_conf.close()
-        if self.ssh_key_path:
-            subprocess.check_call("ssh-add {}".format(self.ssh_key_path), shell=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if os.path.exists(self.DUPLOAD_CONF_TMP_PATH):
-            shutil.move(self.DUPLOAD_CONF_TMP_PATH, self.DUPLOAD_CONF_PATH)
-        else:
-            os.unlink(self.DUPLOAD_CONF_PATH)
-
-
-class SSHConnection(object):
-    def __init__(self, user, host, ssh_key=None):
-        if ssh_key:
-            key_str = "-i {}".format(ssh_key)
-        else:
-            key_str = ""
-
-        self.base_cmd = "ssh {key} {user}@{host}".format(
-            key=key_str, user=user, host=host)
-
-    def execute(self, cmd):
-        logging.info("Executing remote cmd %s", cmd)
-        subprocess.check_call(self.base_cmd + ' "{cmd}"'.format(cmd=cmd),
-                              shell=True)
-
-
-def debsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            cmd = ('debsign -k \'{key}\' -p"gpg --verbose --no-use-agent --batch '
-                   '--no-tty --passphrase {passphrase}" {path}/*.changes').format(
-                       key=gpg_user, passphrase=gpg_passphrase, path=path)
-            logging.info("Build debsign cmd '%s'", cmd)
-            subprocess.check_call(cmd, shell=True)
-            logging.info("debsign finished")
-    except Exception as ex:
-        logging.error("Cannot debsign packages on path %s, with user key", path)
-        raise ex
-
-def rpmsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            for package in os.listdir(path):
-                package_path = os.path.join(path, package)
-                logging.info("Signing %s", package_path)
-                proc = pexpect.spawn('rpm --resign -D "_signature gpg" -D "_gpg_name {username}" {package}'.format(username=gpg_user, package=package_path))
-                proc.expect_exact("Enter pass phrase: ")
-                proc.sendline(gpg_passphrase)
-                proc.expect(pexpect.EOF)
-                logging.info("Signed successfully")
-    except Exception as ex:
-        logging.error("Cannot rpmsign packages on path %s, with user key", path)
-        raise ex
-
-def transfer_packages_scp(ssh_key, path, repo_user, repo_url, incoming_directory):
-    logging.info("Transferring packages via scp to %s", repo_url)
-    if ssh_key:
-        key_str = "-i {}".format(ssh_key)
-    else:
-        key_str = ""
-    subprocess.check_call('scp {key_str} {path}/* {user}@{repo}:{incoming}'.format(
-        path=path, user=repo_user, repo=repo_url, key_str=key_str, incoming=incoming_directory), shell=True)
-    logging.info("Transfer via scp finished")
-
-def transfer_packages_dupload(ssh_key, path, repo_user, repo_url, incoming_directory):
-    repo_short_name = repo_url.split('.')[0]
-    config = {
-        repo_short_name: {
-            "fqdn": repo_url,
-            "incoming": incoming_directory,
-        }
-    }
-    with DebRelease(config, repo_user, ssh_key):
-        logging.info("Duploading")
-        subprocess.check_call("dupload -f --nomail --to {repo} {path}".format(repo=repo_short_name, path=path), shell=True)
-        logging.info("Dupload finished")
-
-
-def clear_old_incoming_packages(ssh_connection, user):
-    for pkg in ('deb', 'rpm', 'tgz'):
-        for release_type in ('stable', 'testing', 'prestable', 'lts'):
-            try:
-                ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/{release_type}/*".format(
-                    user=user, pkg=pkg, release_type=release_type))
-            except Exception:
-                logging.info("rm is not required")
-
-
-def _get_incoming_path(repo_url, user=None, pkg_type=None, release_type=None):
-    if repo_url == 'repo.mirror.yandex.net':
-        return "/home/{user}/incoming/clickhouse/{pkg}/{release_type}".format(
-            user=user, pkg=pkg_type, release_type=release_type)
-    else:
-        return "/repo/{0}/mini-dinstall/incoming/".format(repo_url.split('.')[0])
-
-
-def _fix_args(args):
-
-    if args.gpg_sec_key_path and not os.path.isabs(args.gpg_sec_key_path):
-        args.gpg_sec_key_path = os.path.join(os.getcwd(), args.gpg_sec_key_path)
-
-    if args.gpg_pub_key_path and not os.path.isabs(args.gpg_pub_key_path):
-        args.gpg_pub_key_path = os.path.join(os.getcwd(), args.gpg_pub_key_path)
-
-    if args.ssh_key_path and not os.path.isabs(args.ssh_key_path):
-        args.ssh_key_path = os.path.join(os.getcwd(), args.ssh_key_path)
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
-    parser = argparse.ArgumentParser(description="Program to push clickhouse packages to repository")
-    parser.add_argument('--deb-directory')
-    parser.add_argument('--rpm-directory')
-    parser.add_argument('--tgz-directory')
-    parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable', 'lts'), default='testing')
-    parser.add_argument('--ssh-key-path')
-    parser.add_argument('--gpg-passphrase', required=True)
-    parser.add_argument('--gpg-sec-key-path')
-    parser.add_argument('--gpg-pub-key-path')
-    parser.add_argument('--gpg-key-user', default='robot-clickhouse')
-    parser.add_argument('--repo-url', default='repo.mirror.yandex.net')
-    parser.add_argument('--repo-user', default='buildfarm')
-
-    args = parser.parse_args()
-    if args.deb_directory is None and args.rpm_directory is None and args.tgz_directory is None:
-        parser.error('At least one package directory required')
-
-    _fix_args(args)
-
-    is_open_source = args.repo_url == 'repo.mirror.yandex.net'
-    ssh_connection = SSHConnection(args.repo_user, args.repo_url, args.ssh_key_path)
-
-    packages = []
-    if args.deb_directory:
-        debsign(args.deb_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.deb_directory, 'deb'))
-
-    if args.rpm_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .rpm package to {}".format(args.repo_url))
-        rpmsign(args.rpm_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.rpm_directory, 'rpm'))
-
-    if args.tgz_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .tgz package to {}".format(args.repo_url))
-        packages.append((args.tgz_directory, 'tgz'))
-
-    if is_open_source:
-        logging.info("Clearing old directory with incoming packages on buildfarm")
-        clear_old_incoming_packages(ssh_connection, args.repo_user)
-        logging.info("Incoming directory cleared")
-
-        for package_path, package_type in packages:
-            logging.info("Processing path '%s' with package type %s", package_path, package_type)
-            incoming_directory = _get_incoming_path(args.repo_url, args.repo_user, package_type, args.release_type)
-            if package_type == "deb":
-                transfer_packages_dupload(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-            else:
-                transfer_packages_scp(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-
-            logging.info("Running clickhouse install (it takes about (20-30 minutes)")
-            ssh_connection.execute("sudo /usr/sbin/ya-clickhouse-{0}-install".format(package_type))
-            logging.info("Clickhouse installed")
-            logging.info("Pushing clickhouse to repo")
-            ssh_connection.execute("/usr/sbin/push2publicrepo.sh clickhouse")
-            logging.info("Push finished")
-            logging.info("Package '%s' pushed", package_type)
-    else:
-        transfer_packages_dupload(args.ssh_key_path, args.deb_directory, args.repo_user, args.repo_url, _get_incoming_path(args.repo_url))
diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh
deleted file mode 100644
index 538d596d263..00000000000
--- a/utils/release/release_lib.sh
+++ /dev/null
@@ -1,300 +0,0 @@
-set +e
-# set -x
-
-function gen_version_string {
-    if [ -n "$TEST" ]; then
-        VERSION_STRING="$VERSION_MAJOR.$VERSION_MINOR.$VERSION_PATCH.$VERSION_REVISION"
-    else
-        VERSION_STRING="$VERSION_MAJOR.$VERSION_MINOR.$VERSION_PATCH"
-    fi
-}
-
-function get_version {
-    if [ -z "$VERSION_MAJOR" ] && [ -z "$VERSION_MINOR" ] && [ -z "$VERSION_PATCH" ]; then
-        BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../
-        VERSION_REVISION=`grep "SET(VERSION_REVISION" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'`
-        VERSION_MAJOR=`grep "SET(VERSION_MAJOR" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'`
-        VERSION_MINOR=`grep "SET(VERSION_MINOR" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'`
-        VERSION_PATCH=`grep "SET(VERSION_PATCH" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'`
-    fi
-    VERSION_PREFIX="${VERSION_PREFIX:-v}"
-    VERSION_POSTFIX_TAG="${VERSION_POSTFIX:--testing}"
-
-    gen_version_string
-}
-
-function get_author {
-    AUTHOR=$(git config --get user.name || echo ${USER})
-    echo $AUTHOR
-}
-
-# Generate revision number.
-# set environment variables REVISION, AUTHOR
-function gen_revision_author {
-    TYPE=$1
-    get_version
-
-    if [[ $STANDALONE != 'yes' ]]; then
-
-        git fetch --tags
-
-        succeeded=0
-        attempts=0
-        max_attempts=1000
-        while [ $succeeded -eq 0 ] && [ $attempts -le $max_attempts ]; do
-            attempts=$(($attempts + 1))
-
-            if [ "$TYPE" == "major" ]; then
-                VERSION_REVISION=$(($VERSION_REVISION + 1))
-                VERSION_MAJOR=$(($VERSION_MAJOR + 1))
-                VERSION_MINOR=1
-                # Version cannot be zero, otherwise is breaks CMake
-                VERSION_PATCH=1
-            elif [ "$TYPE" == "minor" ] || [ "$TYPE" == "" ]; then
-                VERSION_REVISION=$(($VERSION_REVISION + 1))
-                VERSION_MINOR=$(($VERSION_MINOR + 1))
-                VERSION_PATCH=1
-            elif [ "$TYPE" == "patch" ] || [ "$TYPE" == "bugfix" ]; then
-                # VERSION_REVISION not incremented in new scheme.
-                if [ "$VERSION_MAJOR" -eq "1" ] && [ "$VERSION_MINOR" -eq "1" ]; then
-                    VERSION_REVISION=$(($VERSION_REVISION + 1))
-                fi
-
-                VERSION_PATCH=$(($VERSION_PATCH + 1))
-            elif [ "$TYPE" == "env" ]; then
-                echo "Will build revision from env variables -- $VERSION_MAJOR.$VERSION_MINOR.$VERSION_PATCH"
-            else
-                echo "Unknown version type $TYPE"
-                exit 1
-            fi
-
-            gen_version_string
-
-            git_tag_grep=`git tag | grep "$VERSION_PREFIX$VERSION_STRING$VERSION_POSTFIX_TAG"`
-            if [ "$git_tag_grep" == "" ]; then
-                succeeded=1
-            fi
-        done
-        if [ $succeeded -eq 0 ]; then
-            echo "Fail to create revision up to $VERSION_REVISION"
-            exit 1
-        fi
-
-        auto_message="Auto version update to"
-        git_log_grep=`git log --oneline --max-count=1 | grep "$auto_message"`
-        if [ "$git_log_grep" == "" ]; then
-            tag="$VERSION_PREFIX$VERSION_STRING$VERSION_POSTFIX_TAG"
-
-            # First tag for correct git describe
-            echo -e "\nTrying to create tag: $tag"
-            git tag -a "$tag" -m "$tag"
-
-            git_describe=`git describe`
-            git_hash=`git rev-parse HEAD`
-            VERSION_DATE=`git show -s --format=%cs $git_hash`
-
-            sed -i -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION $VERSION_REVISION/g;" \
-                -e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE $git_describe/g;" \
-                -e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH $git_hash/g;" \
-                -e "s/SET(VERSION_DATE [^) ]*/SET(VERSION_DATE $VERSION_DATE/g;" \
-                -e "s/SET(VERSION_MAJOR [^) ]*/SET(VERSION_MAJOR $VERSION_MAJOR/g;" \
-                -e "s/SET(VERSION_MINOR [^) ]*/SET(VERSION_MINOR $VERSION_MINOR/g;" \
-                -e "s/SET(VERSION_PATCH [^) ]*/SET(VERSION_PATCH $VERSION_PATCH/g;" \
-                -e "s/SET(VERSION_STRING [^) ]*/SET(VERSION_STRING $VERSION_STRING/g;" \
-                cmake/autogenerated_versions.txt
-
-            gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
-            gen_dockerfiles "$VERSION_STRING"
-            src/Storages/System/StorageSystemContributors.sh ||:
-            utils/list-versions/list-versions.sh > utils/list-versions/version_date.tsv
-
-            git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" cmake/autogenerated_versions.txt debian/changelog docker/*/Dockerfile src/Storages/System/StorageSystemContributors.generated.cpp utils/list-versions/version_date.tsv
-            if [ -z $NO_PUSH ]; then
-                git push
-            fi
-
-            echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}."
-
-            # Second tag for correct version information in autogenerated_versions.txt inside tag
-            if git tag --force -a "$tag" -m "$tag"
-            then
-                if [ -z $NO_PUSH ]; then
-                    echo -e "\nTrying to push tag to origin: $tag"
-                    git push origin "$tag"
-                    if [ $? -ne 0 ]
-                    then
-                        git tag -d "$tag"
-                        echo "Fail to create tag"
-                        exit 1
-                    fi
-                fi
-            fi
-
-        else
-            get_version
-            echo reusing old version $VERSION_STRING
-        fi
-    fi
-
-    AUTHOR=$(git config --get user.name || echo ${USER})
-    export AUTHOR
-}
-
-function get_revision_author {
-    get_version
-    AUTHOR=$(get_author)
-    export AUTHOR
-}
-
-# Generate changelog from changelog.in.
-function gen_changelog {
-    VERSION_STRING="$1"
-    CHDATE="$2"
-    AUTHOR="$3"
-    CHLOG="$4"
-    if [ -z "$VERSION_STRING" ] ; then
-        get_revision_author
-    fi
-
-    if [ -z "$CHLOG" ] ; then
-        CHLOG=debian/changelog
-    fi
-
-    if [ -z "$CHDATE" ] ; then
-        CHDATE=$(LC_ALL=C date -R | sed -e 's/,/\\,/g') # Replace comma to '\,'
-    fi
-
-    sed \
-        -e "s/[@]VERSION_STRING[@]/$VERSION_STRING/g" \
-        -e "s/[@]DATE[@]/$CHDATE/g" \
-        -e "s/[@]AUTHOR[@]/$AUTHOR/g" \
-        -e "s/[@]EMAIL[@]/$(whoami)@clickhouse.com/g" \
-        < $CHLOG.in > $CHLOG
-}
-
-# Change package versions that are installed for Docker images.
-function gen_dockerfiles {
-    VERSION_STRING="$1"
-    ls -1 docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='$VERSION_STRING'/'
-}
-
-function make_rpm {
-    [ -z "$VERSION_STRING" ] && get_version && VERSION_STRING+=${VERSION_POSTFIX}
-    VERSION_FULL="${VERSION_STRING}"
-    PACKAGE_DIR=${PACKAGE_DIR=../}
-
-    function deb_unpack {
-        rm -rf $PACKAGE-$VERSION_FULL
-        alien --verbose --generate --to-rpm --scripts ${PACKAGE_DIR}${PACKAGE}_${VERSION_FULL}_${ARCH}.deb
-        cd $PACKAGE-$VERSION_FULL
-        mv ${PACKAGE}-$VERSION_FULL-2.spec ${PACKAGE}-$VERSION_FULL-2.spec.tmp
-        cat ${PACKAGE}-$VERSION_FULL-2.spec.tmp \
-            | grep -vF '%dir "/"' \
-            | grep -vF '%dir "/usr/"' \
-            | grep -vF '%dir "/usr/bin/"' \
-            | grep -vF '%dir "/usr/lib/"' \
-            | grep -vF '%dir "/usr/lib/debug/"' \
-            | grep -vF '%dir "/usr/lib/.build-id/"' \
-            | grep -vF '%dir "/usr/share/"' \
-            | grep -vF '%dir "/usr/share/doc/"' \
-            | grep -vF '%dir "/lib/"' \
-            | grep -vF '%dir "/lib/systemd/"' \
-            | grep -vF '%dir "/lib/systemd/system/"' \
-            | grep -vF '%dir "/etc/"' \
-            | grep -vF '%dir "/etc/security/"' \
-            | grep -vF '%dir "/etc/security/limits.d/"' \
-            | grep -vF '%dir "/etc/init.d/"' \
-            | grep -vF '%dir "/etc/cron.d/"' \
-            | grep -vF '%dir "/etc/systemd/system/"' \
-            | grep -vF '%dir "/etc/systemd/"' \
-            | sed -e 's|%config |%config(noreplace) |' \
-            > ${PACKAGE}-$VERSION_FULL-2.spec
-    }
-
-    function rpm_pack {
-        rpmbuild --buildroot="$CUR_DIR/${PACKAGE}-$VERSION_FULL" -bb --target ${TARGET} "${PACKAGE}-$VERSION_FULL-2.spec"
-        cd $CUR_DIR
-    }
-
-    function unpack_pack {
-        deb_unpack
-        rpm_pack
-    }
-
-    PACKAGE=clickhouse-server
-    ARCH=all
-    TARGET=noarch
-    deb_unpack
-    mv ${PACKAGE}-$VERSION_FULL-2.spec ${PACKAGE}-$VERSION_FULL-2.spec_tmp
-    echo "Requires: clickhouse-common-static = $VERSION_FULL-2" >> ${PACKAGE}-$VERSION_FULL-2.spec
-    echo "Requires: tzdata" >> ${PACKAGE}-$VERSION_FULL-2.spec
-    echo "Requires: initscripts" >> ${PACKAGE}-$VERSION_FULL-2.spec
-    echo "Obsoletes: clickhouse-server-common < $VERSION_FULL" >> ${PACKAGE}-$VERSION_FULL-2.spec
-
-    cat ${PACKAGE}-$VERSION_FULL-2.spec_tmp >> ${PACKAGE}-$VERSION_FULL-2.spec
-    rpm_pack
-
-    PACKAGE=clickhouse-client
-    ARCH=all
-    TARGET=noarch
-    deb_unpack
-    mv ${PACKAGE}-$VERSION_FULL-2.spec ${PACKAGE}-$VERSION_FULL-2.spec_tmp
-    echo "Requires: clickhouse-common-static = $VERSION_FULL-2" >> ${PACKAGE}-$VERSION_FULL-2.spec
-    cat ${PACKAGE}-$VERSION_FULL-2.spec_tmp >> ${PACKAGE}-$VERSION_FULL-2.spec
-    rpm_pack
-
-    PACKAGE=clickhouse-common-static
-    ARCH=amd64
-    TARGET=x86_64
-    unpack_pack
-
-    PACKAGE=clickhouse-common-static-dbg
-    ARCH=amd64
-    TARGET=x86_64
-    unpack_pack
-
-    mv clickhouse-*-${VERSION_FULL}-2.*.rpm ${PACKAGE_DIR}
-}
-
-function make_tgz {
-    [ -z "$VERSION_STRING" ] && get_version && VERSION_STRING+=${VERSION_POSTFIX}
-    VERSION_FULL="${VERSION_STRING}"
-    PACKAGE_DIR=${PACKAGE_DIR=../}
-
-    for PACKAGE in clickhouse-server clickhouse-client clickhouse-common-static clickhouse-common-static-dbg; do
-        alien --verbose --scripts --generate --to-tgz ${PACKAGE_DIR}${PACKAGE}_${VERSION_FULL}_*.deb
-        PKGDIR="./${PACKAGE}-${VERSION_FULL}"
-        if [ ! -d "$PKGDIR/install" ]; then
-            mkdir "$PKGDIR/install"
-        fi
-
-        if [ ! -f "$PKGDIR/install/doinst.sh" ]; then
-            echo '#!/bin/sh' > "$PKGDIR/install/doinst.sh"
-            echo 'set -e' >> "$PKGDIR/install/doinst.sh"
-        fi
-
-        SCRIPT_TEXT='
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do
-    destpath=${filepath##$SCRIPTPATH/..}
-    mkdir -p $(dirname "$destpath")
-    cp -r "$filepath" "$destpath"
-done
-'
-
-        echo "$SCRIPT_TEXT" | sed -i "2r /dev/stdin" "$PKGDIR/install/doinst.sh"
-
-        chmod +x "$PKGDIR/install/doinst.sh"
-
-        if [ -f "/usr/bin/pigz" ]; then
-            tar --use-compress-program=pigz -cf "${PACKAGE}-${VERSION_FULL}.tgz" "$PKGDIR"
-        else
-            tar -czf "${PACKAGE}-${VERSION_FULL}.tgz" "$PKGDIR"
-        fi
-
-        rm -r $PKGDIR
-    done
-
-
-    mv clickhouse-*-${VERSION_FULL}.tgz ${PACKAGE_DIR}
-}

From 5b763c39b0c8e7fb8b387b5fb259de6f445fd438 Mon Sep 17 00:00:00 2001
From: jasine <jasinechen@gmail.com>
Date: Sat, 16 Apr 2022 17:00:18 +0800
Subject: [PATCH 086/110] Update adopters.md

---
 docs/en/introduction/adopters.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 98eea85bbfa..2ee30d1f2fd 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -197,5 +197,6 @@ toc_title: Adopters
 | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
 | <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
 | <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
+| <a href="https://www.deepglint.com/" class="favicon">Deepglint 格灵深瞳</a> | AI, Computer Vision | OLAP | — | — | [Official Website](https://www.deepglint.com/) |
 
 [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

From cb9675d0f2cab7beb79098f5195932da49bd152b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 16 Apr 2022 21:23:57 +0300
Subject: [PATCH 087/110] Revert "support UNSIGNED modifier with unused
 parameters of INT"

---
 src/Parsers/ParserDataType.cpp                    | 15 ---------------
 .../00841_temporary_table_database.reference      |  4 ----
 .../00841_temporary_table_database.sql            | 10 ----------
 3 files changed, 29 deletions(-)

diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index b0db5430726..63a73ff4d1c 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -97,21 +97,6 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             type_name_suffix = "SIGNED";
         else if (ParserKeyword("UNSIGNED").ignore(pos))
             type_name_suffix = "UNSIGNED";
-        else if (pos->type == TokenType::OpeningRoundBracket)
-        {
-            ++pos;
-            if (pos->type != TokenType::Number)
-                return false;
-            ++pos;
-            if (pos->type != TokenType::ClosingRoundBracket)
-               return false;
-            ++pos;
-            if (ParserKeyword("SIGNED").ignore(pos))
-                type_name_suffix = "SIGNED";
-            else if (ParserKeyword("UNSIGNED").ignore(pos))
-                type_name_suffix = "UNSIGNED";
-        }
-
     }
 
     if (!type_name_suffix.empty())
diff --git a/tests/queries/0_stateless/00841_temporary_table_database.reference b/tests/queries/0_stateless/00841_temporary_table_database.reference
index f22f30a4b2a..d00491fd7e5 100644
--- a/tests/queries/0_stateless/00841_temporary_table_database.reference
+++ b/tests/queries/0_stateless/00841_temporary_table_database.reference
@@ -1,5 +1 @@
 1
-CREATE TEMPORARY TABLE t3_00841\n(\n    `x` UInt32\n)\nENGINE = Memory
-1
-CREATE TEMPORARY TABLE t4_00841\n(\n    `x` Int32\n)\nENGINE = Memory
-1
diff --git a/tests/queries/0_stateless/00841_temporary_table_database.sql b/tests/queries/0_stateless/00841_temporary_table_database.sql
index 96faa0e779b..a5927a4cd33 100644
--- a/tests/queries/0_stateless/00841_temporary_table_database.sql
+++ b/tests/queries/0_stateless/00841_temporary_table_database.sql
@@ -3,13 +3,3 @@ INSERT INTO t1_00841 VALUES (1);
 SELECT * FROM t1_00841;
 
 CREATE TEMPORARY TABLE test.t2_00841 (x UInt8); -- { serverError 442 }
-
-CREATE TEMPORARY TABLE t3_00841 (x INT(11) UNSIGNED);
-SHOW CREATE TEMPORARY TABLE t3_00841;
-INSERT INTO t3_00841 VALUES (1);
-SELECT * FROM t3_00841;
-
-CREATE TEMPORARY TABLE t4_00841 (x INT(11) SIGNED);
-SHOW CREATE TEMPORARY TABLE t4_00841;
-INSERT INTO t4_00841 VALUES (1);
-SELECT * FROM t4_00841;

From 42618239a4a43de15defd8b4b7bd178e77eabe6f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 17 Apr 2022 01:53:11 +0300
Subject: [PATCH 088/110] Update ParserDataType.cpp

---
 src/Parsers/ParserDataType.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 63a73ff4d1c..9baf6555d57 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -92,7 +92,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (type_name_upper.find("INT") != std::string::npos)
     {
-        /// Support SIGNED and UNSIGNED integer type modifiers for compatibility with MySQL
+        /// Support SIGNED and UNSIGNED integer type modifiers for compatibility with MySQL.
         if (ParserKeyword("SIGNED").ignore(pos))
             type_name_suffix = "SIGNED";
         else if (ParserKeyword("UNSIGNED").ignore(pos))

From b7fe203fe1456618328a00e46aebd455e96426ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 17 Apr 2022 01:56:58 +0200
Subject: [PATCH 089/110] Warn and exit if harmful environment variables are
 set

---
 programs/main.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/programs/main.cpp b/programs/main.cpp
index 2cdda075ca7..2a9bfb90879 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -13,6 +13,8 @@
 #include <tuple>
 #include <utility> /// pair
 
+#include <fmt/format.h>
+
 #include "config_tools.h"
 
 #include <Common/StringUtils/StringUtils.h>
@@ -332,6 +334,19 @@ struct Checker
 #endif
 ;
 
+void checkHarmfulEnvironmentVariables()
+{
+    /// The list is a selection from "man ld-linux":
+    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK"})
+    {
+        if (const char * value = getenv(var); value && value[0])
+        {
+            std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value);
+            _exit(1);
+        }
+    }
+}
+
 }
 
 
@@ -352,6 +367,8 @@ int main(int argc_, char ** argv_)
     inside_main = true;
     SCOPE_EXIT({ inside_main = false; });
 
+    checkHarmfulEnvironmentVariables();
+
     /// Reset new handler to default (that throws std::bad_alloc)
     /// It is needed because LLVM library clobbers it.
     std::set_new_handler(nullptr);

From b25850a66570d01d9463f3cef11e94f5f7aa514d Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 05:42:07 +0000
Subject: [PATCH 090/110] Add setting throw_if_no_data_to_insert

---
 src/Client/ClientBase.cpp                             |  8 +++++++-
 src/Core/Settings.h                                   |  1 +
 .../0_stateless/02267_insert_empty_data.reference     |  0
 tests/queries/0_stateless/02267_insert_empty_data.sql | 11 +++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02267_insert_empty_data.reference
 create mode 100644 tests/queries/0_stateless/02267_insert_empty_data.sql

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 929f0a48e2b..028e4a6f16e 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1055,7 +1055,13 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
     /// Process the query that requires transferring data blocks to the server.
     const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
     if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
-        throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+    {
+        const auto & settings = global_context->getSettingsRef();
+        if (settings.throw_if_no_data_to_insert)
+            throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+        else
+            return;
+    }
 
     connection->sendQuery(
         connection_parameters.timeouts,
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa78456702c..350be1295cb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -582,6 +582,7 @@ class IColumn;
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
+    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, disable by default", 0) \
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
 
diff --git a/tests/queries/0_stateless/02267_insert_empty_data.reference b/tests/queries/0_stateless/02267_insert_empty_data.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02267_insert_empty_data.sql b/tests/queries/0_stateless/02267_insert_empty_data.sql
new file mode 100644
index 00000000000..9c92fc2a3f7
--- /dev/null
+++ b/tests/queries/0_stateless/02267_insert_empty_data.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS t;
+
+CREATE TABLE t (n UInt32) ENGINE=Memory;
+
+INSERT INTO t VALUES; -- { clientError 108 }
+
+set throw_if_no_data_to_insert = 0;
+
+INSERT INTO t VALUES;
+
+DROP TABLE t;

From 8d87e48cb42825d3334174e07808bf59b9145174 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 17 Apr 2022 09:01:15 +0300
Subject: [PATCH 091/110] Simplify perf scripts by using schema inference

---
 docker/test/performance-comparison/compare.sh | 40 ++++---------------
 1 file changed, 7 insertions(+), 33 deletions(-)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index cdfa080a475..6297bbead70 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -362,19 +362,6 @@ function get_profiles
     clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1"
 }
 
-function build_log_column_definitions
-{
-# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
-# absolutely atrocious way. This should be done by the file() function itself.
-for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv
-do
-    paste -d' ' \
-        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
-        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
-        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
-done
-}
-
 # Build and analyze randomization distribution for all queries.
 function analyze_queries
 {
@@ -382,8 +369,6 @@ rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.t
 rm -rf analyze ||:
 mkdir analyze analyze/tmp ||:
 
-build_log_column_definitions
-
 # Split the raw test output into files suitable for analysis.
 # To debug calculations only for a particular test, substitute a suitable
 # wildcard here, e.g. `for test_file in modulo-raw.tsv`.
@@ -422,12 +407,10 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
 
 -- Process queries that were run normally, on both servers.
 create view left_query_log as select *
-    from file('left-query-log.tsv', TSVWithNamesAndTypes,
-        '$(cat "left-query-log.tsv.columns")');
+    from file('left-query-log.tsv', TSVWithNamesAndTypes);
 
 create view right_query_log as select *
-    from file('right-query-log.tsv', TSVWithNamesAndTypes,
-        '$(cat "right-query-log.tsv.columns")');
+    from file('right-query-log.tsv', TSVWithNamesAndTypes);
 
 create view query_logs as
     select 0 version, query_id, ProfileEvents,
@@ -645,8 +628,6 @@ mkdir report report/tmp ||:
 
 rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||:
 
-build_log_column_definitions
-
 cat analyze/errors.log >> report/errors.log ||:
 cat profile-errors.log >> report/errors.log ||:
 
@@ -1028,8 +1009,7 @@ create table unstable_query_runs engine File(TSVWithNamesAndTypes,
     ;
 
 create view query_log as select *
-    from file('$version-query-log.tsv', TSVWithNamesAndTypes,
-        '$(cat "$version-query-log.tsv.columns")');
+    from file('$version-query-log.tsv', TSVWithNamesAndTypes);
 
 create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
         'unstable-run-metrics.$version.rep') as
@@ -1057,8 +1037,7 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
     array join v, n;
 
 create view trace_log as select *
-    from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
-        '$(cat "$version-trace-log.tsv.columns")');
+    from file('$version-trace-log.tsv', TSVWithNamesAndTypes);
 
 create view addresses_src as select addr,
         -- Some functions change name between builds, e.g. '__clone' or 'clone' or
@@ -1067,8 +1046,7 @@ create view addresses_src as select addr,
         [name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
             -- this line is a subscript operator of the above array
             [1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
-    from file('$version-addresses.tsv', TSVWithNamesAndTypes,
-        '$(cat "$version-addresses.tsv.columns")');
+    from file('$version-addresses.tsv', TSVWithNamesAndTypes);
 
 create table addresses_join_$version engine Join(any, left, address) as
     select addr address, name from addresses_src;
@@ -1195,15 +1173,12 @@ done
 
 function report_metrics
 {
-build_log_column_definitions
-
 rm -rf metrics ||:
 mkdir metrics
 
 clickhouse-local --query "
 create view right_async_metric_log as
-    select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
-        '$(cat right-async-metric-log.tsv.columns)')
+    select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
     ;
 
 -- Use the right log as time reference because it may have higher precision.
@@ -1211,8 +1186,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
     with (select min(event_time) from right_async_metric_log) as min_time
     select metric, r.event_time - min_time event_time, l.value as left, r.value as right
     from right_async_metric_log r
-    asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
-        '$(cat left-async-metric-log.tsv.columns)') l
+    asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l
     on l.metric = r.metric and r.event_time <= l.event_time
     order by metric, event_time
     ;

From e43bdf75809bfa0c09252846d33caee175d02365 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 17 Apr 2022 17:21:20 +0300
Subject: [PATCH 092/110] Update main.cpp

---
 programs/main.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 2a9bfb90879..71fb09cc183 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -336,8 +336,9 @@ struct Checker
 
 void checkHarmfulEnvironmentVariables()
 {
-    /// The list is a selection from "man ld-linux":
-    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK"})
+    /// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
+    /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
+    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
     {
         if (const char * value = getenv(var); value && value[0])
         {

From 293d0a5d462493723b409581380278911816ece8 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 15:07:56 +0000
Subject: [PATCH 093/110] Implement type inference for INSERT INTO function
 null()

---
 src/TableFunctions/TableFunctionNull.cpp        | 17 ++++++++++++-----
 src/TableFunctions/TableFunctionNull.h          |  7 ++++++-
 ...ence_for_insert_into_function_null.reference |  0
 ..._inference_for_insert_into_function_null.sql |  3 +++
 4 files changed, 21 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference
 create mode 100644 tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql

diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp
index 63e496ecd0c..cb51799a4a7 100644
--- a/src/TableFunctions/TableFunctionNull.cpp
+++ b/src/TableFunctions/TableFunctionNull.cpp
@@ -21,13 +21,16 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c
 {
     const auto * function = ast_function->as<ASTFunction>();
     if (!function || !function->arguments)
-        throw Exception("Table function '" + getName() + "' requires 'structure'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        throw Exception("Table function '" + getName() + "' requires 'structure'", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     const auto & arguments = function->arguments->children;
-    if (arguments.size() != 1)
-        throw Exception("Table function '" + getName() + "' requires 'structure'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+    if (!arguments.empty() && arguments.size() != 1)
+        throw Exception(
+            "Table function '" + getName() + "' requires 'structure' argument or empty argument",
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    structure = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context)->as<ASTLiteral>()->value.safeGet<String>();
+    if (!arguments.empty())
+        structure = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context)->as<ASTLiteral>()->value.safeGet<String>();
 }
 
 ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const
@@ -37,7 +40,11 @@ ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context
 
 StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    auto columns = getActualTableStructure(context);
+    ColumnsDescription columns;
+    if (structure != "auto")
+        columns = getActualTableStructure(context);
+    else if (!structure_hint.empty())
+        columns = structure_hint;
     auto res = StorageNull::create(StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription(), String{});
     res->startup();
     return res;
diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h
index 6734fb8efb6..329315e00c9 100644
--- a/src/TableFunctions/TableFunctionNull.h
+++ b/src/TableFunctions/TableFunctionNull.h
@@ -16,6 +16,10 @@ class TableFunctionNull : public ITableFunction
 public:
     static constexpr auto name = "null";
     std::string getName() const override { return name; }
+
+    bool needStructureHint() const override { return structure == "auto"; }
+
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
 private:
     StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override;
     const char * getStorageTypeName() const override { return "Null"; }
@@ -23,7 +27,8 @@ private:
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
     ColumnsDescription getActualTableStructure(ContextPtr context) const override;
 
-    String structure;
+    String structure = "auto";
+    ColumnsDescription structure_hint;
 };
 
 }
diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
new file mode 100644
index 00000000000..487ff661e37
--- /dev/null
+++ b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
@@ -0,0 +1,3 @@
+INSERT INTO function null() SELECT 1;
+INSERT INTO function null() SELECT number FROM numbers(10);
+INSERT INTO function null() SELECT number, toString(number) FROM numbers(10);

From 0024a08ace455a0be23dbad7bff01b0d05bec905 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 14:32:26 +0000
Subject: [PATCH 094/110] fix

fix typo
---
 src/Parsers/ParserInsertQuery.cpp | 7 +++++--
 src/Server/GRPCServer.cpp         | 8 +++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp
index b0ca361155f..7f8a8d59fd0 100644
--- a/src/Parsers/ParserInsertQuery.cpp
+++ b/src/Parsers/ParserInsertQuery.cpp
@@ -41,6 +41,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_with("WITH");
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
+    ParserToken s_semicolon(TokenType::Semicolon);
     ParserIdentifier name_p(true);
     ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
     ParserFunction table_function_p{false};
@@ -146,8 +147,10 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing.
     if (!infile && s_values.ignore(pos, expected))
     {
-        /// If VALUES is defined in query, everything except setting will be parsed as data
-        data = pos->begin;
+        /// If VALUES is defined in query, everything except setting will be parsed as data,
+        /// and if values followed by semicolon, the data should be null.
+        if (!s_semicolon.checkWithoutMoving(pos, expected))
+            data = pos->begin;
         format_str = "Values";
     }
     else if (s_format.ignore(pos, expected))
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 7578f8afc1d..68d73a6be2a 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -956,7 +956,13 @@ namespace
             if (!insert_query)
                 throw Exception("Query requires data to insert, but it is not an INSERT query", ErrorCodes::NO_DATA_TO_INSERT);
             else
-                throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+            {
+                const auto & settings = query_context->getSettingsRef();
+                if (settings.throw_if_no_data_to_insert)
+                    throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+                else
+                    return;
+            }
         }
 
         /// This is significant, because parallel parsing may be used.

From 1ffd9fe34ce3e8384b971a2f6889143ff457c638 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 16:29:13 +0000
Subject: [PATCH 095/110] update test

---
 .../02267_type_inference_for_insert_into_function_null.sql     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
index 487ff661e37..de8332442c7 100644
--- a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
+++ b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
@@ -1,3 +1,6 @@
 INSERT INTO function null() SELECT 1;
 INSERT INTO function null() SELECT number FROM numbers(10);
 INSERT INTO function null() SELECT number, toString(number) FROM numbers(10);
+INSERT INTO function null('auto') SELECT 1;
+INSERT INTO function null('auto') SELECT number FROM numbers(10);
+INSERT INTO function null('auto') SELECT number, toString(number) FROM numbers(10);

From aa93f4a0743cb411961495eb03702642ff56ddf3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 17 Apr 2022 20:48:02 +0200
Subject: [PATCH 096/110] Do not randomize "priority" setting

---
 src/Interpreters/SystemLog.cpp | 9 ++-------
 tests/clickhouse-test          | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index f079e41851a..c53b635ed6c 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -182,9 +182,7 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context)
 
 }
 
-///
-/// SystemLogs
-///
+
 SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config)
 {
     query_log = createSystemLog<QueryLog>(global_context, "system", "query_log", config, "query_log");
@@ -273,9 +271,7 @@ void SystemLogs::shutdown()
         log->shutdown();
 }
 
-///
-/// SystemLog
-///
+
 template <typename LogElement>
 SystemLog<LogElement>::SystemLog(
     ContextPtr context_,
@@ -545,7 +541,6 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
         storage_settings->loadFromQuery(*create->storage);
     }
 
-
     return create;
 }
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5f15ecf3dc6..13d45e7f8cd 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -382,7 +382,6 @@ class SettingsRandomizer:
         "group_by_two_level_threshold_bytes": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 50000000,
         "distributed_aggregation_memory_efficient": lambda: random.randint(0, 1),
         "fsync_metadata": lambda: random.randint(0, 1),
-        "priority": lambda: min(3, int(abs(random.gauss(0, 2)))),
         "output_format_parallel_formatting": lambda: random.randint(0, 1),
         "input_format_parallel_parsing": lambda: random.randint(0, 1),
         "min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))),

From 19a8207ab707ae54e8e5349de0246ba29131d362 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 00:16:53 +0200
Subject: [PATCH 097/110] Debug integration tests

---
 tests/integration/test_config_xml_full/configs/config.xml       | 2 +-
 tests/integration/test_distributed_inter_server_secret/test.py  | 2 +-
 .../configs/conf.xml                                            | 2 +-
 tests/integration/test_jemalloc_percpu_arena/test.py            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml
index 4c0b8275869..f7a0afecac5 100644
--- a/tests/integration/test_config_xml_full/configs/config.xml
+++ b/tests/integration/test_config_xml_full/configs/config.xml
@@ -933,7 +933,7 @@
     <!-- Custom TLD lists.
          Format: <name>/path/to/file</name>
 
-         Changes will not be applied w/o server restart.
+         Changes will not be applied without server restart.
          Path to the list is under top_level_domains_path (see above).
     -->
     <top_level_domains_lists>
diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index 8d344834c50..fbfcb35e85f 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -201,7 +201,7 @@ def test_secure_insert_sync():
     n1.query("TRUNCATE TABLE data ON CLUSTER secure")
 
 
-# INSERT w/o initial_user
+# INSERT without initial_user
 #
 # Buffer() flush happens with global context, that does not have user
 # And so Context::user/ClientInfo::current_user/ClientInfo::initial_user will be empty
diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
index 2c40f0fab4a..ed2cdb1ea29 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
@@ -2,7 +2,7 @@
     <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly.
 
          Also note, that usually it is enough 3Gi,
-         but TSan uses 2.8+- GiB of RAM w/o just at start,
+         but TSan uses 2.8+- GiB of RAM without just at start,
          so this limit had been increased to 4GB
     -->
     <max_server_memory_usage>4000000000</max_server_memory_usage>
diff --git a/tests/integration/test_jemalloc_percpu_arena/test.py b/tests/integration/test_jemalloc_percpu_arena/test.py
index 80d8e2ae36a..0dccde3776e 100755
--- a/tests/integration/test_jemalloc_percpu_arena/test.py
+++ b/tests/integration/test_jemalloc_percpu_arena/test.py
@@ -59,7 +59,7 @@ def skip_if_jemalloc_disabled():
     """
     ).strip()
     if output != b"ON" and output != b"1":
-        pytest.skip(f"Compiled w/o jemalloc (USE_JEMALLOC={output})")
+        pytest.skip(f"Compiled without jemalloc (USE_JEMALLOC={output})")
 
 
 # Ensure that clickhouse works even when number of online CPUs

From 1959ae2c9e5096672ca4316561567453a498eda8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:00:18 +0200
Subject: [PATCH 098/110] Fix integration tests

---
 .../configs/conf.xml                                       | 7 +------
 .../test.py                                                | 6 ++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
index ed2cdb1ea29..9f7a88d4542 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
@@ -1,10 +1,5 @@
 <clickhouse>
-    <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly.
-
-         Also note, that usually it is enough 3Gi,
-         but TSan uses 2.8+- GiB of RAM without just at start,
-         so this limit had been increased to 4GB
-    -->
+    <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly. -->
     <max_server_memory_usage>4000000000</max_server_memory_usage>
 
     <query_thread_log remove="remove"/>
diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
index 2e4824a5a4f..338d7e14d5d 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
@@ -29,7 +29,12 @@ def start_cluster():
 # max_memory_usage_for_user cannot be used, since the memory for user accounted
 # correctly, only total is not (it is set via conf.xml)
 def test_memory_tracking_total():
+    if instance.is_built_with_thread_sanitizer():
+        pytest.skip("Memory tracking does not make sense to check under Thread Sanitizer")
+
     instance.query("CREATE TABLE null (row String) ENGINE=Null")
+
+    # Prepare data for insertion
     instance.exec_in_container(
         [
             "bash",
@@ -37,6 +42,7 @@ def test_memory_tracking_total():
             "clickhouse local -q \"SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), ' ') from numbers(10000)\" > data.json",
         ]
     )
+
     for it in range(0, 20):
         # the problem can be triggered only via HTTP,
         # since clickhouse-client parses the data by itself.

From 242919eddd56d3dcce7b7335e1ccfe6ff9937332 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:02:49 +0200
Subject: [PATCH 099/110] Remove abbreviation

---
 src/AggregateFunctions/AggregateFunctionIf.cpp                | 2 +-
 src/Client/ClientBase.cpp                                     | 4 ++--
 src/Common/DNSResolver.cpp                                    | 2 +-
 src/Common/SensitiveDataMasker.h                              | 2 +-
 src/Common/new_delete.cpp                                     | 4 ++--
 src/Core/Settings.h                                           | 4 ++--
 src/Databases/DatabaseMemory.cpp                              | 2 +-
 src/Dictionaries/HashedDictionary.cpp                         | 2 +-
 src/IO/PeekableReadBuffer.h                                   | 2 +-
 src/Interpreters/Context.cpp                                  | 2 +-
 src/Interpreters/ExpressionAnalyzer.cpp                       | 2 +-
 src/Interpreters/executeQuery.cpp                             | 2 +-
 src/Parsers/ASTInsertQuery.cpp                                | 2 +-
 src/Parsers/ASTSubquery.cpp                                   | 2 +-
 src/Parsers/InsertQuerySettingsPushDownVisitor.h              | 2 +-
 src/Processors/Formats/IInputFormat.h                         | 2 +-
 src/Processors/Transforms/WindowTransform.h                   | 2 +-
 src/QueryPipeline/Pipe.h                                      | 2 +-
 src/QueryPipeline/RemoteQueryExecutor.cpp                     | 2 +-
 src/Server/TCPHandler.cpp                                     | 4 ++--
 src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp | 2 +-
 .../MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp      | 4 ++--
 src/Storages/StorageDistributed.cpp                           | 2 +-
 src/Storages/StorageDistributed.h                             | 2 +-
 src/Storages/StorageMerge.cpp                                 | 2 +-
 25 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp
index ce71e76de43..dbf627ac6b5 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@@ -56,7 +56,7 @@ private:
     /// The name of the nested function, including combinators (i.e. *If)
     ///
     /// getName() from the nested_function cannot be used because in case of *If combinator
-    /// with Nullable argument nested_function will point to the function w/o combinator.
+    /// with Nullable argument nested_function will point to the function without combinator.
     /// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail.
     ///
     /// And nested_function cannot point to the function with *If since
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index c9b7bb3a8ee..3df30e98843 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1649,7 +1649,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 catch (...)
                 {
                     // Surprisingly, this is a client error. A server error would
-                    // have been reported w/o throwing (see onReceiveSeverException()).
+                    // have been reported without throwing (see onReceiveSeverException()).
                     client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
                     have_error = true;
                 }
@@ -1692,7 +1692,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                     if (!test_hint.clientError() && !test_hint.serverError())
                     {
                         // No error was expected but it still occurred. This is the
-                        // default case w/o test hint, doesn't need additional
+                        // default case without test hint, doesn't need additional
                         // diagnostics.
                         error_matches_hint = false;
                     }
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 1a3eaf91f68..d757ec2ae2a 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -89,7 +89,7 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
 
     /// NOTE:
     /// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
-    /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
+    /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets)
     if (host.starts_with('['))
     {
         assert(host.ends_with(']'));
diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h
index 88309459578..edd9f10ca91 100644
--- a/src/Common/SensitiveDataMasker.h
+++ b/src/Common/SensitiveDataMasker.h
@@ -31,7 +31,7 @@ namespace Util
 ///  context can't own, as Context is destroyed before logger,
 ///    and logger lives longer and logging can still happen after Context destruction.
 ///    resetting masker in the logger at the moment of
-///    context destruction can't be done w/o synchronization / locks in a safe manner.
+///    context destruction can't be done without synchronization / locks in a safe manner.
 ///
 ///  logger is Poco derived and i didn't want to brake it's interface,
 ///    also logger can be dynamically reconfigured without server restart,
diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp
index 7b4bff04185..052e1333be6 100644
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@@ -43,8 +43,8 @@ static struct InitializeJemallocZoneAllocatorForOSX
 
 
 /// Replace default new/delete with memory tracking versions.
-/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
-///     https://en.cppreference.com/w/cpp/memory/new/operator_delete
+/// @sa https://en.cppreference.com/w/cpp/memory/newithoutperator_new
+///     https://en.cppreference.com/w/cpp/memory/newithoutperator_delete
 
 /// new
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa78456702c..a2efbf7a92a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -58,7 +58,7 @@ class IColumn;
     M(Milliseconds, connect_timeout_with_failover_secure_ms, 100, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
     M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
     M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
-    M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain w/o ignoring errors", 0) \
+    M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain without ignoring errors", 0) \
     M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
     M(Milliseconds, hedged_connection_timeout_ms, 100, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
     M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
@@ -574,7 +574,7 @@ class IColumn;
     M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \
     M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \
     \
-    M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
+    M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
     \
     /** Experimental functions */ \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp
index a92c19f67c0..5f99eca1d18 100644
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@@ -44,7 +44,7 @@ void DatabaseMemory::dropTable(
     auto table = detachTableUnlocked(table_name, lock);
     try
     {
-        /// Remove table w/o lock since:
+        /// Remove table without lock since:
         /// - it does not require it
         /// - it may cause lock-order-inversion if underlying storage need to
         ///   resolve tables (like StorageLiveView)
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 178631d9c53..9640f013336 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -649,7 +649,7 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
             if constexpr (sparse || std::is_same_v<AttributeValueType, Field>)
             {
                 /// bucket_count() - Returns table size, that includes empty and deleted
-                /// size()         - Returns table size, w/o empty and deleted
+                /// size()         - Returns table size, without empty and deleted
                 /// and since this is sparsehash, empty cells should not be significant,
                 /// and since items cannot be removed from the dictionary, deleted is also not important.
                 bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index f22987d9daa..4bdf6dd3dbf 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -66,7 +66,7 @@ public:
     bool hasUnreadData() const;
 
     // for streaming reading (like in Kafka) we need to restore initial state of the buffer
-    // w/o recreating the buffer.
+    // without recreating the buffer.
     void reset();
 
 private:
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 40ed4d58993..f41d4662e99 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -437,7 +437,7 @@ struct ContextSharedPart
 #endif
         }
 
-        /// Can be removed w/o context lock
+        /// Can be removed without context lock
         delete_system_logs.reset();
         delete_embedded_dictionaries.reset();
         delete_external_dictionaries_loader.reset();
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 5e5931713e0..63fca80516b 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1756,7 +1756,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
             // The output of this expression chain is the result of
             // SELECT (before "final projection" i.e. renaming the columns), so
             // we have to mark the expressions that are required in the output,
-            // again. We did it for the previous expression chain ("select w/o
+            // again. We did it for the previous expression chain ("select without
             // window functions") earlier, in appendSelect(). But that chain also
             // produced the expressions required to calculate window functions.
             // They are not needed in the final SELECT result. Knowing the correct
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index a3232b798e5..a3a5c2946a3 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -544,7 +544,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         }
 
         /// MUST go before any modification (except for prepared statements,
-        /// since it substitute parameters and w/o them query does not contain
+        /// since it substitute parameters and without them query does not contain
         /// parameters), to keep query as-is in query_log and server log.
         query_for_logging = prepareQueryForLogging(query, context);
         logQuery(query_for_logging, context, internal);
diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp
index 3fa6a6ed1a9..dceec83e763 100644
--- a/src/Parsers/ASTInsertQuery.cpp
+++ b/src/Parsers/ASTInsertQuery.cpp
@@ -100,7 +100,7 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
         settings_ast->formatImpl(settings, state, frame);
     }
 
-    /// Compatibility for INSERT w/o SETTINGS to format in oneline, i.e.:
+    /// Compatibility for INSERT without SETTINGS to format in oneline, i.e.:
     ///
     ///     INSERT INTO foo VALUES
     ///
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 58f334376e6..84b7862c630 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -30,7 +30,7 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const
 void ASTSubquery::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     /// NOTE: due to trickery of filling cte_name (in interpreters) it is hard
-    /// to print it w/o newline (for !oneline case), since if nl_or_ws
+    /// to print it without newline (for !oneline case), since if nl_or_ws
     /// prepended here, then formatting will be incorrect with alias:
     ///
     ///   (select 1 in ((select 1) as sub))
diff --git a/src/Parsers/InsertQuerySettingsPushDownVisitor.h b/src/Parsers/InsertQuerySettingsPushDownVisitor.h
index cbb2ceaf517..d1f161fc89b 100644
--- a/src/Parsers/InsertQuerySettingsPushDownVisitor.h
+++ b/src/Parsers/InsertQuerySettingsPushDownVisitor.h
@@ -13,7 +13,7 @@ class SettingsChanges;
 /// Pushdown SETTINGS clause to the INSERT from the SELECT query:
 /// (since SETTINGS after SELECT will be parsed by the SELECT parser.)
 ///
-/// NOTE: INSERT ... SELECT ... FORMAT Null SETTINGS max_insert_threads=10 works even w/o push down,
+/// NOTE: INSERT ... SELECT ... FORMAT Null SETTINGS max_insert_threads=10 works even without push down,
 /// since ParserInsertQuery does not use ParserQueryWithOutput.
 class InsertQuerySettingsPushDownMatcher
 {
diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h
index 9774f630f15..d060ce4155e 100644
--- a/src/Processors/Formats/IInputFormat.h
+++ b/src/Processors/Formats/IInputFormat.h
@@ -45,7 +45,7 @@ public:
     /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format.
      * The recreating of parser for each small stream takes too long, so we introduce a method
      * resetParser() which allow to reset the state of parser to continue reading of
-     * source stream w/o recreating that.
+     * source stream without recreating that.
      * That should be called after current buffer was fully read.
      */
     virtual void resetParser();
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index d536c8780d2..e6db8402c5f 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -320,7 +320,7 @@ public:
     // We update the states of the window functions after we find the final frame
     // boundaries.
     // After we have found the final boundaries of the frame, we can immediately
-    // output the result for the current row, w/o waiting for more data.
+    // output the result for the current row, without waiting for more data.
     RowNumber frame_start;
     RowNumber frame_end;
     bool frame_ended = false;
diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h
index bc19b8389b3..6f85b7a6a88 100644
--- a/src/QueryPipeline/Pipe.h
+++ b/src/QueryPipeline/Pipe.h
@@ -96,7 +96,7 @@ public:
 
     /// Get processors from Pipe. Use it with cautious, it is easy to loss totals and extremes ports.
     static Processors detachProcessors(Pipe pipe) { return std::move(pipe.processors); }
-    /// Get processors from Pipe w/o destroying pipe (used for EXPLAIN to keep QueryPlan).
+    /// Get processors from Pipe without destroying pipe (used for EXPLAIN to keep QueryPlan).
     const Processors & getProcessors() const { return processors; }
 
     /// Specify quotas and limits for every ISourceWithProgress.
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index f4a30a9fee7..2cf39e54e06 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -503,7 +503,7 @@ void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
     }
     else
     {
-        /// Drain connections synchronously w/o suppressing errors.
+        /// Drain connections synchronously without suppressing errors.
         CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections);
         ConnectionCollector::drainConnections(*connections, /* throw_error= */ true);
         CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1);
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 981a59caefe..267eb978683 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -371,7 +371,7 @@ void TCPHandler::runImpl()
                 /// Send final progress
                 ///
                 /// NOTE: we cannot send Progress for regular INSERT (w/ VALUES)
-                /// w/o breaking protocol compatibility, but it can be done
+                /// without breaking protocol compatibility, but it can be done
                 /// by increasing revision.
                 sendProgress();
             }
@@ -1240,7 +1240,7 @@ void TCPHandler::receiveQuery()
 
     /// In interserer mode,
     /// initial_user can be empty in case of Distributed INSERT via Buffer/Kafka,
-    /// (i.e. when the INSERT is done with the global context w/o user),
+    /// (i.e. when the INSERT is done with the global context without user),
     /// so it is better to reset session to avoid using old user.
     if (is_interserver_mode)
     {
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
index 4d18adc1dfc..c4cb470923b 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
@@ -386,7 +386,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
                 else
                 {
                     // Need to remove ATTACH_PART from the queue or drop data.
-                    // Similar to `StorageReplicatedMergeTree::dropPart` w/o extra
+                    // Similar to `StorageReplicatedMergeTree::dropPart` without extra
                     // checks as we know drop shall be possible.
                     ReplicatedMergeTreeLogEntryData attach_rollback_log_entry;
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
index 44877c3da95..ce33ac8c467 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
@@ -97,7 +97,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
         threshold_init = settings->remote_fs_execute_merges_on_single_replica_time_threshold.totalSeconds();
 
     if (threshold == 0)
-        /// we can reset the settings w/o lock (it's atomic)
+        /// we can reset the settings without lock (it's atomic)
         execute_merges_on_single_replica_time_threshold = threshold;
     if (threshold_init == 0)
         remote_fs_execute_merges_on_single_replica_time_threshold = threshold_init;
@@ -137,7 +137,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
         if (execute_merges_on_single_replica_time_threshold > 0)
         {
             LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
-            /// we can reset the settings w/o lock (it's atomic)
+            /// we can reset the settings without lock (it's atomic)
             execute_merges_on_single_replica_time_threshold = 0;
         }
         /// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 62ec2524a32..999a1d1bb10 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -989,7 +989,7 @@ void StorageDistributed::drop()
     // (in shutdown()).
     shutdown();
 
-    // Distributed table w/o sharding_key does not allows INSERTs
+    // Distributed table without sharding_key does not allows INSERTs
     if (relative_data_path.empty())
         return;
 
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index a890cabd8b1..1bc7225db85 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -202,7 +202,7 @@ private:
     /// - WithMergeableStateAfterAggregationAndLimit
     /// - Complete
     ///
-    /// Some simple queries w/o GROUP BY/DISTINCT can use more optimal stage.
+    /// Some simple queries without GROUP BY/DISTINCT can use more optimal stage.
     ///
     /// Also in case of optimize_distributed_group_by_sharding_key=1 the queries
     /// with GROUP BY/DISTINCT sharding_key can also use more optimal stage.
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 8b71cfdb102..a2f25a9db7e 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -175,7 +175,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     /// In case of JOIN the first stage (which includes JOIN)
     /// should be done on the initiator always.
     ///
-    /// Since in case of JOIN query on shards will receive query w/o JOIN (and their columns).
+    /// Since in case of JOIN query on shards will receive query without JOIN (and their columns).
     /// (see removeJoin())
     ///
     /// And for this we need to return FetchColumns.

From c7bca13a03afa6ffca282c2d57f81a9a81b0e00d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:03:58 +0200
Subject: [PATCH 100/110] Remove abbreviation

---
 src/Common/new_delete.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp
index 052e1333be6..7b4bff04185 100644
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@@ -43,8 +43,8 @@ static struct InitializeJemallocZoneAllocatorForOSX
 
 
 /// Replace default new/delete with memory tracking versions.
-/// @sa https://en.cppreference.com/w/cpp/memory/newithoutperator_new
-///     https://en.cppreference.com/w/cpp/memory/newithoutperator_delete
+/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
+///     https://en.cppreference.com/w/cpp/memory/new/operator_delete
 
 /// new
 

From 24c13853c30ebffd6c2e453a7e26536b30dcc792 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:45:44 +0200
Subject: [PATCH 101/110] Fix slow test

---
 .../00089_position_functions_with_non_constant_arg.sql          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql b/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
index 4be84fe9e91..1404e9c34be 100644
--- a/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
+++ b/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
@@ -1,3 +1,5 @@
+SET max_threads = 0; -- let's reset to automatic detection of the number of threads, otherwise test can be slow.
+
 SELECT count() FROM test.hits WHERE position(URL, 'metrika') != position(URL, materialize('metrika'));
 SELECT count() FROM test.hits WHERE positionCaseInsensitive(URL, 'metrika') != positionCaseInsensitive(URL, materialize('metrika'));
 SELECT count() FROM test.hits WHERE positionUTF8(Title, 'новости') != positionUTF8(Title, materialize('новости'));

From 56df9bbc0d8047c80e776eb3974cc942f3d3f947 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 18 Apr 2022 02:59:31 +0300
Subject: [PATCH 102/110] Update test.py

---
 .../test.py                                                   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
index 338d7e14d5d..c95bbfda708 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
@@ -30,7 +30,9 @@ def start_cluster():
 # correctly, only total is not (it is set via conf.xml)
 def test_memory_tracking_total():
     if instance.is_built_with_thread_sanitizer():
-        pytest.skip("Memory tracking does not make sense to check under Thread Sanitizer")
+        pytest.skip(
+            "Memory tracking does not make sense to check under Thread Sanitizer"
+        )
 
     instance.query("CREATE TABLE null (row String) ENGINE=Null")
 

From 70d7ba5d611fe4db6bade47accec55c325767424 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 02:13:28 +0200
Subject: [PATCH 103/110] Fix clang-tidy

---
 src/Interpreters/JIT/compileFunction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp
index 4dac65046a7..9d4f62fc873 100644
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@@ -201,7 +201,7 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
     for (size_t i = 0; i < arg_types.size(); ++i)
     {
         auto & column = columns[i];
-        auto & type = arg_types[i];
+        const auto & type = arg_types[i];
 
         auto * value = b.CreateLoad(toNativeType(b, removeNullable(type)), column.data);
         if (!type->isNullable())

From 0fc61ea44546de45f6db06c2c5ec11c3502e9261 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 18 Apr 2022 03:16:30 +0300
Subject: [PATCH 104/110] Update main.cpp

---
 programs/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 71fb09cc183..62f2f2f3150 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -338,7 +338,7 @@ void checkHarmfulEnvironmentVariables()
 {
     /// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
     /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
-    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
+    for (const auto * var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
     {
         if (const char * value = getenv(var); value && value[0])
         {

From 5f7267b671135bf1dfdd9da4e99d7a9128d0ad03 Mon Sep 17 00:00:00 2001
From: yjant <44889745+yjant@users.noreply.github.com>
Date: Mon, 18 Apr 2022 09:32:53 +0800
Subject: [PATCH 105/110] Update nothing.md

The links are wrong.
---
 docs/zh/sql-reference/data-types/special-data-types/nothing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
index 52c705522e5..64f656185c1 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@@ -2,7 +2,7 @@
 
 此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。
 
-例如，文本 [NULL](../../../sql-reference/data-types/special-data-types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/special-data-types/nothing.md)。
+例如，字面量 [NULL](../../../sql-reference/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/nullable.md)。
 
 `Nothing` 类型也可以用来表示空数组：
 

From 9bab9077b27bcf84a99af663042d243d24605872 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 03:39:56 +0200
Subject: [PATCH 106/110] Fix error in sanity checks

---
 base/base/getAvailableMemoryAmount.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/base/getAvailableMemoryAmount.cpp b/base/base/getAvailableMemoryAmount.cpp
index d2f794e8952..e9bbbd95caf 100644
--- a/base/base/getAvailableMemoryAmount.cpp
+++ b/base/base/getAvailableMemoryAmount.cpp
@@ -14,8 +14,8 @@
 
 uint64_t getAvailableMemoryAmountOrZero()
 {
-#if defined(_SC_AVPHYS_PAGES) // linux
-    return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
+#if defined(_SC_PHYS_PAGES) // linux
+    return getPageSize() * sysconf(_SC_PHYS_PAGES);
 #elif defined(__FreeBSD__)
     struct vmtotal vmt;
     size_t vmt_size = sizeof(vmt);

From 0cc90c95688bffc219c87c464aa2099014c046f0 Mon Sep 17 00:00:00 2001
From: yjant <44889745+yjant@users.noreply.github.com>
Date: Mon, 18 Apr 2022 09:56:08 +0800
Subject: [PATCH 107/110] Update nothing.md

Links are wrong.
---
 .../ru/sql-reference/data-types/special-data-types/nothing.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/sql-reference/data-types/special-data-types/nothing.md b/docs/ru/sql-reference/data-types/special-data-types/nothing.md
index 30d425461e1..7a58d52573f 100644
--- a/docs/ru/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/ru/sql-reference/data-types/special-data-types/nothing.md
@@ -5,9 +5,9 @@ toc_title: Nothing
 
 # Nothing {#nothing}
 
-Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения.
+Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/syntax.md#null-literal), т.е. отсутствие значения.
 
-Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/special-data-types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
+Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
 
 ``` sql
 SELECT toTypeName(Array())

From c23e451b04fea1f6332658b9c392fd29b1185677 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 04:22:40 +0200
Subject: [PATCH 108/110] Add nodiscard attribute to immutable methods of
 IColumn to avoid errors

---
 src/Columns/IColumn.h | 110 +++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 9bf5e067cce..f4986799d47 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -45,64 +45,64 @@ private:
     /// This is internal method to use from COW.
     /// It performs shallow copy with copy-ctor and not useful from outside.
     /// If you want to copy column for modification, look at 'mutate' method.
-    virtual MutablePtr clone() const = 0;
+    [[nodiscard]] virtual MutablePtr clone() const = 0;
 
 public:
     /// Name of a Column. It is used in info messages.
-    virtual std::string getName() const { return getFamilyName(); }
+    [[nodiscard]] virtual std::string getName() const { return getFamilyName(); }
 
     /// Name of a Column kind, without parameters (example: FixedString, Array).
-    virtual const char * getFamilyName() const = 0;
+    [[nodiscard]] virtual const char * getFamilyName() const = 0;
 
     /// Type of data that column contains. It's an underlying type: UInt16 for Date, UInt32 for DateTime, so on.
-    virtual TypeIndex getDataType() const = 0;
+    [[nodiscard]] virtual TypeIndex getDataType() const = 0;
 
     /** If column isn't constant, returns itself.
       * If column is constant, transforms constant to full column (if column type allows such transform) and return it.
       */
-    virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
 
     /// If column isn't ColumnLowCardinality, return itself.
     /// If column is ColumnLowCardinality, transforms it to full column.
-    virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
 
     /// If column isn't ColumnSparse, return itself.
     /// If column is ColumnSparse, transforms it to full column.
-    virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
 
-    Ptr convertToFullIfNeeded() const
+    [[nodiscard]] Ptr convertToFullIfNeeded() const
     {
         return convertToFullColumnIfSparse()->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
     }
 
     /// Creates empty column with the same type.
-    virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
+    [[nodiscard]] virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
 
     /// Creates column with the same type and specified size.
     /// If size is less current size, then data is cut.
     /// If size is greater, than default values are appended.
-    virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
 
     /// Returns number of values in column.
-    virtual size_t size() const = 0;
+    [[nodiscard]] virtual size_t size() const = 0;
 
     /// There are no values in columns.
-    bool empty() const { return size() == 0; }
+    [[nodiscard]] bool empty() const { return size() == 0; }
 
     /// Returns value of n-th element in universal Field representation.
     /// Is used in rare cases, since creation of Field instance is expensive usually.
-    virtual Field operator[](size_t n) const = 0;
+    [[nodiscard]] virtual Field operator[](size_t n) const = 0;
 
     /// Like the previous one, but avoids extra copying if Field is in a container, for example.
     virtual void get(size_t n, Field & res) const = 0;
 
     /// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception)
     /// Is used to optimize some computations (in aggregation, for example).
-    virtual StringRef getDataAt(size_t n) const = 0;
+    [[nodiscard]] virtual StringRef getDataAt(size_t n) const = 0;
 
     /// Like getData, but has special behavior for columns that contain variable-length strings.
     /// Returns zero-ending memory chunk (i.e. its size is 1 byte longer).
-    virtual StringRef getDataAtWithTerminatingZero(size_t n) const
+    [[nodiscard]] virtual StringRef getDataAtWithTerminatingZero(size_t n) const
     {
         return getDataAt(n);
     }
@@ -110,19 +110,19 @@ public:
     /// If column stores integers, it returns n-th element transformed to UInt64 using static_cast.
     /// If column stores floating point numbers, bits of n-th elements are copied to lower bits of UInt64, the remaining bits are zeros.
     /// Is used to optimize some computations (in aggregation, for example).
-    virtual UInt64 get64(size_t /*n*/) const
+    [[nodiscard]] virtual UInt64 get64(size_t /*n*/) const
     {
         throw Exception("Method get64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// If column stores native numeric type, it returns n-th element casted to Float64
     /// Is used in regression methods to cast each features into uniform type
-    virtual Float64 getFloat64(size_t /*n*/) const
+    [[nodiscard]] virtual Float64 getFloat64(size_t /*n*/) const
     {
         throw Exception("Method getFloat64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual Float32 getFloat32(size_t /*n*/) const
+    [[nodiscard]] virtual Float32 getFloat32(size_t /*n*/) const
     {
         throw Exception("Method getFloat32 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -131,31 +131,31 @@ public:
       * For NULL values of Nullable column it is allowed to return arbitrary value.
       * Otherwise throw an exception.
       */
-    virtual UInt64 getUInt(size_t /*n*/) const
+    [[nodiscard]] virtual UInt64 getUInt(size_t /*n*/) const
     {
         throw Exception("Method getUInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual Int64 getInt(size_t /*n*/) const
+    [[nodiscard]] virtual Int64 getInt(size_t /*n*/) const
     {
         throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual bool isDefaultAt(size_t n) const = 0;
-    virtual bool isNullAt(size_t /*n*/) const { return false; }
+    [[nodiscard]] virtual bool isDefaultAt(size_t n) const = 0;
+    [[nodiscard]] virtual bool isNullAt(size_t /*n*/) const { return false; }
 
     /** If column is numeric, return value of n-th element, casted to bool.
       * For NULL values of Nullable column returns false.
       * Otherwise throw an exception.
       */
-    virtual bool getBool(size_t /*n*/) const
+    [[nodiscard]] virtual bool getBool(size_t /*n*/) const
     {
         throw Exception("Method getBool is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Removes all elements outside of specified range.
     /// Is used in LIMIT operation, for example.
-    virtual Ptr cut(size_t start, size_t length) const
+    [[nodiscard]] virtual Ptr cut(size_t start, size_t length) const
     {
         MutablePtr res = cloneEmpty();
         res->insertRangeFrom(*this, start, length);
@@ -249,7 +249,7 @@ public:
       * otherwise (i.e. < 0), makes reserve() using size of source column.
       */
     using Filter = PaddedPODArray<UInt8>;
-    virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
+    [[nodiscard]] virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
 
     /** Expand column by mask inplace. After expanding column will
       * satisfy the following: if we filter it by given mask, we will
@@ -262,11 +262,11 @@ public:
     /// Permutes elements using specified permutation. Is used in sorting.
     /// limit - if it isn't 0, puts only first limit elements in the result.
     using Permutation = PaddedPODArray<size_t>;
-    virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
+    [[nodiscard]] virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
 
     /// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
     /// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
-    virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
+    [[nodiscard]] virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
 
     /** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
       * Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
@@ -279,10 +279,10 @@ public:
       *
       * For non Nullable and non floating point types, nan_direction_hint is ignored.
       */
-    virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
+    [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
 
     /// Equivalent to compareAt, but collator is used to compare values.
-    virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
+    [[nodiscard]] virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
     {
         throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
     }
@@ -297,7 +297,7 @@ public:
                                int direction, int nan_direction_hint) const = 0;
 
     /// Check if all elements in the column have equal values. Return true if column is empty.
-    virtual bool hasEqualValues() const = 0;
+    [[nodiscard]] virtual bool hasEqualValues() const = 0;
 
     enum class PermutationSortDirection : uint8_t
     {
@@ -353,7 +353,7 @@ public:
       */
     using Offset = UInt64;
     using Offsets = PaddedPODArray<Offset>;
-    virtual Ptr replicate(const Offsets & offsets) const = 0;
+    [[nodiscard]] virtual Ptr replicate(const Offsets & offsets) const = 0;
 
     /** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
       * Selector must contain values from 0 to num_columns - 1.
@@ -361,7 +361,7 @@ public:
       */
     using ColumnIndex = UInt64;
     using Selector = PaddedPODArray<ColumnIndex>;
-    virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
+    [[nodiscard]] virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
 
     /// Insert data from several other columns according to source mask (used in vertical merge).
     /// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop
@@ -385,15 +385,15 @@ public:
     virtual void ensureOwnership() {}
 
     /// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
-    virtual size_t byteSize() const = 0;
+    [[nodiscard]] virtual size_t byteSize() const = 0;
 
     /// Size of single value in memory (for accounting purposes)
-    virtual size_t byteSizeAt(size_t /*n*/) const = 0;
+    [[nodiscard]] virtual size_t byteSizeAt(size_t /*n*/) const = 0;
 
     /// Size of memory, allocated for column.
     /// This is greater or equals to byteSize due to memory reservation in containers.
     /// Zero, if could not be determined.
-    virtual size_t allocatedBytes() const = 0;
+    [[nodiscard]] virtual size_t allocatedBytes() const = 0;
 
     /// Make memory region readonly with mprotect if it is large enough.
     /// The operation is slow and performed only for debug builds.
@@ -406,14 +406,14 @@ public:
 
     /// Columns have equal structure.
     /// If true - you can use "compareAt", "insertFrom", etc. methods.
-    virtual bool structureEquals(const IColumn &) const
+    [[nodiscard]] virtual bool structureEquals(const IColumn &) const
     {
         throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Returns ration of values in column, that equal to default value of column.
     /// Checks only @sample_ratio ratio of rows.
-    virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
+    [[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
 
     /// Returns indices of values in column, that not equal to default value of column.
     virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
@@ -423,13 +423,13 @@ public:
     /// Other values are filled by @default_value.
     /// @shift means how much rows to skip from the beginning of current column.
     /// Used to create full column from sparse.
-    virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
+    [[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
 
-    virtual SerializationInfoPtr getSerializationInfo() const;
+    [[nodiscard]] virtual SerializationInfoPtr getSerializationInfo() const;
 
     /// Compress column in memory to some representation that allows to decompress it back.
     /// Return itself if compression is not applicable for this column type.
-    virtual Ptr compress() const
+    [[nodiscard]] virtual Ptr compress() const
     {
         /// No compression by default.
         return getPtr();
@@ -437,13 +437,13 @@ public:
 
     /// If it's CompressedColumn, decompress it and return.
     /// Otherwise return itself.
-    virtual Ptr decompress() const
+    [[nodiscard]] virtual Ptr decompress() const
     {
         return getPtr();
     }
 
 
-    static MutablePtr mutate(Ptr ptr)
+    [[nodiscard]] static MutablePtr mutate(Ptr ptr)
     {
         MutablePtr res = ptr->shallowMutate(); /// Now use_count is 2.
         ptr.reset(); /// Reset use_count to 1.
@@ -463,10 +463,10 @@ public:
     /// Various properties on behaviour of column type.
 
     /// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc.
-    virtual bool isNullable() const { return false; }
+    [[nodiscard]] virtual bool isNullable() const { return false; }
 
     /// It's a special kind of column, that contain single value, but is not a ColumnConst.
-    virtual bool isDummy() const { return false; }
+    [[nodiscard]] virtual bool isDummy() const { return false; }
 
     /** Memory layout properties.
       *
@@ -486,32 +486,32 @@ public:
       */
 
     /// Values in column have fixed size (including the case when values span many memory segments).
-    virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
+    [[nodiscard]] virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
 
     /// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize.
-    virtual bool isFixedAndContiguous() const { return false; }
+    [[nodiscard]] virtual bool isFixedAndContiguous() const { return false; }
 
     /// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
-    virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
 
     /// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
-    virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
+    [[nodiscard]] virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
 
     /// Column is ColumnVector of numbers or ColumnConst of it. Note that Nullable columns are not numeric.
-    virtual bool isNumeric() const { return false; }
+    [[nodiscard]] virtual bool isNumeric() const { return false; }
 
     /// If the only value column can contain is NULL.
     /// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
-    virtual bool onlyNull() const { return false; }
+    [[nodiscard]] virtual bool onlyNull() const { return false; }
 
     /// Can be inside ColumnNullable.
-    virtual bool canBeInsideNullable() const { return false; }
+    [[nodiscard]] virtual bool canBeInsideNullable() const { return false; }
 
-    virtual bool lowCardinality() const { return false; }
+    [[nodiscard]] virtual bool lowCardinality() const { return false; }
 
-    virtual bool isSparse() const { return false; }
+    [[nodiscard]] virtual bool isSparse() const { return false; }
 
-    virtual bool isCollationSupported() const { return false; }
+    [[nodiscard]] virtual bool isCollationSupported() const { return false; }
 
     virtual ~IColumn() = default;
     IColumn() = default;
@@ -519,7 +519,7 @@ public:
 
     /** Print column name, size, and recursively print all subcolumns.
       */
-    String dumpStructure() const;
+    [[nodiscard]] String dumpStructure() const;
 
 protected:
     /// Template is to devirtualize calls to insertFrom method.

From f86808914c7e4d6591ad5a1c083ac91f56af6005 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 04:44:30 +0200
Subject: [PATCH 109/110] Fix strange trash in Keeper

---
 src/Coordination/FourLetterCommand.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 544dc3e141b..9553279d955 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -298,7 +298,7 @@ String ServerStatCommand::run()
     write("Latency min/avg/max", latency.str());
 
     write("Received", toString(stats.getPacketsReceived()));
-    write("Sent ", toString(stats.getPacketsSent()));
+    write("Sent", toString(stats.getPacketsSent()));
     write("Connections", toString(keeper_info.alive_connections_count));
     write("Outstanding", toString(keeper_info.outstanding_requests_count));
     write("Zxid", toString(keeper_info.last_zxid));
@@ -328,7 +328,7 @@ String StatCommand::run()
     write("Latency min/avg/max", latency.str());
 
     write("Received", toString(stats.getPacketsReceived()));
-    write("Sent ", toString(stats.getPacketsSent()));
+    write("Sent", toString(stats.getPacketsSent()));
     write("Connections", toString(keeper_info.alive_connections_count));
     write("Outstanding", toString(keeper_info.outstanding_requests_count));
     write("Zxid", toString(keeper_info.last_zxid));

From 735c38cdccafe8936c6edcc03f750abff6fe0c16 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Mon, 18 Apr 2022 04:59:41 +0000
Subject: [PATCH 110/110] fix typo

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 27212aa0498..d2c56747d23 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -582,7 +582,7 @@ class IColumn;
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
-    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, disable by default", 0) \
+    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.