From a6254516e0588e43c9ec5ea7711b765458acfedf Mon Sep 17 00:00:00 2001
From: zhongyuankai <872237106@qq.com>
Date: Tue, 11 Jan 2022 09:58:53 +0800
Subject: [PATCH 01/47] Fix Alter ttl modification unsupported table engine

---
 src/Interpreters/InterpreterAlterQuery.cpp    |  2 +-
 src/Storages/AlterCommands.cpp                |  6 +-
 src/Storages/AlterCommands.h                  |  2 +-
 src/Storages/IStorage.h                       |  3 +
 src/Storages/MergeTree/MergeTreeData.h        |  2 +
 src/Storages/StorageFactory.h                 |  1 +
 .../02184_storage_add_support_ttl.reference   |  0
 .../02184_storage_add_support_ttl.sql         | 59 +++++++++++++++++++
 8 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02184_storage_add_support_ttl.reference
 create mode 100644 tests/queries/0_stateless/02184_storage_add_support_ttl.sql

diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index d01f2b05567..bd6839d72d0 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -170,7 +170,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
     {
         auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout);
         StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
-        alter_commands.validate(metadata, getContext());
+        alter_commands.validate(table, getContext());
         alter_commands.prepare(metadata);
         table->checkAlterIsPossible(alter_commands, getContext());
         table->alter(alter_commands, getContext(), alter_lock);
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 90e10abfa92..e35cf553b63 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -998,8 +998,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
 }
 
 
-void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPtr context) const
+void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
 {
+    const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata();
     auto all_columns = metadata.columns;
     /// Default expression for all added/modified columns
     ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
@@ -1008,6 +1009,9 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt
     {
         const auto & command = (*this)[i];
 
+        if (command.ttl && !table->supportsTTL())
+            throw Exception("Engine " + table->getName() + " doesn't support TTL clause", ErrorCodes::BAD_ARGUMENTS);
+
         const auto & column_name = command.column_name;
         if (command.type == AlterCommand::ADD_COLUMN)
         {
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index dce6b496741..71c622cb9be 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -186,7 +186,7 @@ public:
     /// Checks that all columns exist and dependencies between them.
     /// This check is lightweight and base only on metadata.
     /// More accurate check have to be performed with storage->checkAlterIsPossible.
-    void validate(const StorageInMemoryMetadata & metadata, ContextPtr context) const;
+    void validate(const StoragePtr & table, ContextPtr context) const;
 
     /// Prepare alter commands. Set ignore flag to some of them and set some
     /// parts to commands from storage's metadata (for example, absent default)
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 6342c3f6b47..99f57ce058a 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -132,6 +132,9 @@ public:
     /// Returns true if the storage supports insert queries with the PARTITION BY section.
     virtual bool supportsPartitionBy() const { return false; }
 
+    /// Returns true if the storage supports queries with the TTL section.
+    virtual bool supportsTTL() const { return false; }
+
     /// Returns true if the storage supports queries with the PREWHERE section.
     virtual bool supportsPrewhere() const { return false; }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index ebd1950a720..05d5efb0eed 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -414,6 +414,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsTTL() const override { return true; }
+
     NamesAndTypesList getVirtuals() const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override;
diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h
index 6ffa6327176..77309541374 100644
--- a/src/Storages/StorageFactory.h
+++ b/src/Storages/StorageFactory.h
@@ -59,6 +59,7 @@ public:
         bool supports_skipping_indices = false;
         bool supports_projections = false;
         bool supports_sort_order = false;
+        /// See also IStorage::supportsTTL()
         bool supports_ttl = false;
         /// See also IStorage::supportsReplication()
         bool supports_replication = false;
diff --git a/tests/queries/0_stateless/02184_storage_add_support_ttl.reference b/tests/queries/0_stateless/02184_storage_add_support_ttl.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02184_storage_add_support_ttl.sql b/tests/queries/0_stateless/02184_storage_add_support_ttl.sql
new file mode 100644
index 00000000000..dca8ee3105c
--- /dev/null
+++ b/tests/queries/0_stateless/02184_storage_add_support_ttl.sql
@@ -0,0 +1,59 @@
+DROP TABLE IF EXISTS mergeTree_02184;
+CREATE TABLE mergeTree_02184 (id UInt64, name String, dt Date) Engine=MergeTree ORDER BY id;
+ALTER TABLE mergeTree_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH;
+DETACH TABLE mergeTree_02184;
+ATTACH TABLE mergeTree_02184;
+
+DROP TABLE IF EXISTS distributed_02184;
+CREATE TABLE distributed_02184 (id UInt64, name String, dt Date) Engine=Distributed('test_cluster_two_shards', 'default', 'mergeTree_02184', rand());
+ALTER TABLE distributed_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE distributed_02184;
+ATTACH TABLE distributed_02184;
+
+DROP TABLE IF EXISTS buffer_02184;
+CREATE TABLE buffer_02184 (id UInt64, name String, dt Date) ENGINE = Buffer(default, mergeTree_02184, 16, 10, 100, 10000, 1000000, 10000000, 100000000);
+ALTER TABLE buffer_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE buffer_02184;
+ATTACH TABLE buffer_02184;
+
+DROP TABLE IF EXISTS merge_02184;
+CREATE TABLE merge_02184 (id UInt64, name String, dt Date) ENGINE = Merge('default', 'distributed_02184');
+ALTER TABLE merge_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE merge_02184;
+ATTACH TABLE merge_02184;
+
+DROP TABLE IF EXISTS null_02184;
+CREATE TABLE null_02184 AS system.one Engine=Null();
+ALTER TABLE null_02184 MODIFY COLUMN dummy Int TTL now() + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE null_02184;
+ATTACH TABLE null_02184;
+
+DROP TABLE IF EXISTS file_02184;
+CREATE TABLE file_02184 (id UInt64, name String, dt Date) ENGINE = File(TabSeparated);
+ALTER TABLE file_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE file_02184;
+ATTACH TABLE file_02184;
+
+DROP TABLE IF EXISTS memory_02184;
+CREATE TABLE memory_02184 (id UInt64, name String, dt Date) ENGINE = Memory();
+ALTER TABLE memory_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE memory_02184;
+ATTACH TABLE memory_02184;
+
+DROP TABLE IF EXISTS log_02184;
+CREATE TABLE log_02184 (id UInt64, name String, dt Date) ENGINE = Log();
+ALTER TABLE log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE log_02184;
+ATTACH TABLE log_02184;
+
+DROP TABLE IF EXISTS ting_log_02184;
+CREATE TABLE ting_log_02184 (id UInt64, name String, dt Date) ENGINE = TinyLog();
+ALTER TABLE ting_log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE ting_log_02184;
+ATTACH TABLE ting_log_02184;
+
+DROP TABLE IF EXISTS stripe_log_02184;
+CREATE TABLE stripe_log_02184 (id UInt64, name String, dt Date) ENGINE = StripeLog;
+ALTER TABLE stripe_log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS }
+DETACH TABLE stripe_log_02184;
+ATTACH TABLE stripe_log_02184;

From 44b221fd22f9d8c0fa6f563d80e1dfafbf6726a8 Mon Sep 17 00:00:00 2001
From: Thom O'Connor <thom.oconnor@yahoo.com>
Date: Fri, 8 Apr 2022 10:53:08 -0600
Subject: [PATCH 02/47] Updated adopters: added 4 new adopters

---
 docs/en/introduction/adopters.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 98eea85bbfa..e4d60a4fc9e 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -28,6 +28,7 @@ toc_title: Adopters
 | <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
 | <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
 | <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
+| <a href="https://betterstack.com/" class="favicon">Better Stack</a> | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) |
 | <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
 | <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
 | <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
@@ -112,7 +113,7 @@ toc_title: Adopters
 | <a href="https://nlmk.com/en/" class="favicon">NLMK</a> | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) |
 | <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
 | <a href="https://www.noction.com" class="favicon">Noction</a> | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability)
-| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
+| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
 | <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
 | <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
 | <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
@@ -123,6 +124,7 @@ toc_title: Adopters
 | <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
 | <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
 | <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
+| <a href="https://pingcap.com/" class="favicon">PingCAP</a> | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) |
 | <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
 | <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) |
 | <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
@@ -159,6 +161,7 @@ toc_title: Adopters
 | <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
 | <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
 | <a href="https://swetrix.com" class="favicon">Swetrix</a> | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) |
+| <a href="https://synpse.net/" class="favicon">Synpse</a> | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) |
 | <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
 | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
 | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
@@ -172,6 +175,7 @@ toc_title: Adopters
 | <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
 | <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
 | <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
+| <a href="https://vkontech.com/" class="favicon">VKontech</a> | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) |
 | <a href="https://www.vmware.com/" class="favicon">VMware</a> | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) |
 | <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
 | <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |

From ae88549c4f330712962fcf805d6bb8680fee6bb4 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 13 Apr 2022 20:02:52 +0000
Subject: [PATCH 03/47] Allow insert select for files with formats without
 schema inference

---
 src/Formats/ReadSchemaUtils.cpp               |  2 +-
 src/TableFunctions/ITableFunctionFileLike.cpp | 23 -------------------
 ..._format_without_schema_inference.reference |  1 +
 ...t_with_format_without_schema_inference.sql |  2 ++
 4 files changed, 4 insertions(+), 24 deletions(-)
 create mode 100644 tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference
 create mode 100644 tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 3e88b51152d..ccb017a60ac 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -94,7 +94,7 @@ ColumnsDescription readSchemaFromFormat(
         }
     }
     else
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name);
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference. You must specify the structure manually", format_name);
 
     return ColumnsDescription(names_and_types);
 }
diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp
index 3388a7ec9f6..cf4a62a30f4 100644
--- a/src/TableFunctions/ITableFunctionFileLike.cpp
+++ b/src/TableFunctions/ITableFunctionFileLike.cpp
@@ -25,24 +25,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-namespace
-{
-    void checkIfFormatSupportsAutoStructure(const String & name, const String & format)
-    {
-        if (name == "file" && format == "Distributed")
-            return;
-
-        if (FormatFactory::instance().checkIfFormatHasAnySchemaReader(format))
-            return;
-
-        throw Exception(
-            "Table function '" + name
-                + "' allows automatic structure determination only for formats that support schema inference and for Distributed format in table function "
-                  "'file'",
-            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-    }
-}
-
 void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     /// Parse args
@@ -68,18 +50,13 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
         format = FormatFactory::instance().getFormatFromFileName(filename, true);
 
     if (args.size() <= 2)
-    {
-        checkIfFormatSupportsAutoStructure(getName(), format);
         return;
-    }
 
     if (args.size() != 3 && args.size() != 4)
         throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
-    if (structure == "auto")
-        checkIfFormatSupportsAutoStructure(getName(), format);
 
     if (structure.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql
new file mode 100644
index 00000000000..8ea9dba2696
--- /dev/null
+++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql
@@ -0,0 +1,2 @@
+insert into function file('02269_data', 'RowBinary') select 1;
+select * from file('02269_data', 'RowBinary', 'x UInt8');

From 31f469a8749de3e0cce5529d391547e473485b9c Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 14 Apr 2022 10:18:55 +0000
Subject: [PATCH 04/47] Split test 01675_data_type_coroutine into 2 tests to
 prevent possible timeouts

---
 .../01675_data_type_coroutine.reference       |  1 -
 .../0_stateless/01675_data_type_coroutine.sh  | 12 ------------
 .../01675_data_type_coroutine_2.reference     |  1 +
 .../01675_data_type_coroutine_2.sh            | 19 +++++++++++++++++++
 4 files changed, 20 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/01675_data_type_coroutine_2.reference
 create mode 100755 tests/queries/0_stateless/01675_data_type_coroutine_2.sh

diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference
index 541dab48def..7326d960397 100644
--- a/tests/queries/0_stateless/01675_data_type_coroutine.reference
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference
@@ -1,2 +1 @@
 Ok
-Ok
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh
index 9f7d5401bd2..4106d0d7f73 100755
--- a/tests/queries/0_stateless/01675_data_type_coroutine.sh
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh
@@ -17,15 +17,3 @@ while [[ $counter -lt $retries ]]; do
 done
 
 echo 'Ok'
-
-counter=0
-I=0
-while [[ $counter -lt $retries ]]; do
-    I=$((I + 1))
-    TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I")
-    ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break;
-    ((++counter))
-done
-
-#echo "I = ${I}"
-echo 'Ok'
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine_2.reference b/tests/queries/0_stateless/01675_data_type_coroutine_2.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine_2.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine_2.sh b/tests/queries/0_stateless/01675_data_type_coroutine_2.sh
new file mode 100755
index 00000000000..501b9d4ab12
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine_2.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+counter=0 retries=60
+I=0
+while [[ $counter -lt $retries ]]; do
+    I=$((I + 1))
+    TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I")
+    ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break;
+    ((++counter))
+done
+
+#echo "I = ${I}"
+echo 'Ok'

From 428753a4a77135ad76a2b5f76f0b9c38bb5e74b6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Apr 2022 14:39:36 +0200
Subject: [PATCH 05/47] Fix integration tests parser

---
 tests/integration/ci-runner.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index 05e56d2a910..a047c6c114c 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -461,6 +461,11 @@ class ClickhouseIntegrationTestsRunner:
                 if test not in main_counters[state]:
                     main_counters[state].append(test)
 
+        for state in ("SKIPPED",):
+            for test in current_counters[state]:
+                main_counters[state].append(test)
+
+
     def _get_runner_image_cmd(self, repo_path):
         image_cmd = ""
         if self._can_run_with(

From c74b6c547b6638648e1df14a6dd5478d993c5333 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Apr 2022 14:40:56 +0200
Subject: [PATCH 06/47] Black

---
 tests/integration/ci-runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py
index a047c6c114c..687c88b04a3 100755
--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@@ -465,7 +465,6 @@ class ClickhouseIntegrationTestsRunner:
             for test in current_counters[state]:
                 main_counters[state].append(test)
 
-
     def _get_runner_image_cmd(self, repo_path):
         image_cmd = ""
         if self._can_run_with(

From e22a175afb1b0415895ddb195b9a29566802bc09 Mon Sep 17 00:00:00 2001
From: zhanglistar <zhanglinuxstar@gmail.com>
Date: Fri, 15 Apr 2022 14:04:30 +0800
Subject: [PATCH 07/47] Fix ReadBufferFromHDFS crash in debug mode

---
 src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
index 1bafa49e55b..58666d0f7f1 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
@@ -137,7 +137,7 @@ std::optional<size_t> ReadBufferFromHDFS::getTotalSize()
 bool ReadBufferFromHDFS::nextImpl()
 {
     impl->position() = impl->buffer().begin() + offset();
-    auto result = impl->next();
+    auto result = impl->nextImpl();
 
     if (result)
         BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset); /// use the buffer returned by `impl`

From 58fc4e033ae143af6a5802d89787e25dad2c2e8e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 22:54:08 +0200
Subject: [PATCH 08/47] Remove trash

---
 programs/server/Server.cpp                 |  1 +
 src/Common/getNumberOfPhysicalCPUCores.cpp | 94 ++++++++++------------
 2 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index fc9187cb622..0120564e0b1 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1716,6 +1716,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     return Application::EXIT_OK;
 }
 
+
 void Server::createServers(
     Poco::Util::AbstractConfiguration & config,
     const std::vector<std::string> & listen_hosts,
diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index 1d26b522e64..872a2464eb6 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -12,75 +12,63 @@
 #include <thread>
 
 #if defined(OS_LINUX)
-unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
+static int readFrom(const char * filename, int default_value)
 {
-    // Try to look at cgroups limit if it is available.
-    auto read_from = [](const char * filename, int default_value) -> int {
-        std::ifstream infile(filename);
-        if (!infile.is_open())
-        {
-            return default_value;
-        }
-        int idata;
-        if (infile >> idata)
-            return idata;
-        else
-            return default_value;
-    };
+    std::ifstream infile(filename);
+    if (!infile.is_open())
+        return default_value;
+    int idata;
+    if (infile >> idata)
+        return idata;
+    else
+        return default_value;
+}
 
+/// Try to look at cgroups limit if it is available.
+static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
+{
     unsigned quota_count = default_cpu_count;
-    // Return the number of milliseconds per period process is guaranteed to run.
-    // -1 for no quota
-    int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
-    int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
+    /// Return the number of milliseconds per period process is guaranteed to run.
+    /// -1 for no quota
+    int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
+    int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
     if (cgroup_quota > -1 && cgroup_period > 0)
-    {
         quota_count = ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period));
-    }
 
     return std::min(default_cpu_count, quota_count);
 }
-#endif // OS_LINUX
+#endif
 
 unsigned getNumberOfPhysicalCPUCores()
 {
-    static const unsigned number = [] {
-        unsigned cpu_count = 0; // start with an invalid num
+    unsigned cpu_count = 0; // start with an invalid num
+
 #if USE_CPUID
-        do
-        {
-            cpu_raw_data_t raw_data;
-            cpu_id_t data;
+    cpu_raw_data_t raw_data;
+    cpu_id_t data;
 
-            /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
-            /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
-            if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0)
-            {
-                // Just fallback
-                break;
-            }
+    /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
+    /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
+    /// Also, libcpuid gives strange result on Google Compute Engine VMs.
+    /// Example:
+    ///  num_cores = 12,            /// number of physical cores on current CPU socket
+    ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
+    ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
+    /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
 
-            cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
-
-            /// Also, libcpuid gives strange result on Google Compute Engine VMs.
-            /// Example:
-            ///  num_cores = 12,            /// number of physical cores on current CPU socket
-            ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
-            ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
-            /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
-        } while (false);
+    if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0)
+        cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
 #endif
 
-        /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
-        /// (Actually, only Aarch64 is supported).
-        if (cpu_count == 0)
-            cpu_count = std::thread::hardware_concurrency();
+    /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
+    /// (Actually, only Aarch64 is supported).
+    if (cpu_count == 0)
+        cpu_count = std::thread::hardware_concurrency();
 
 #if defined(OS_LINUX)
-        /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
-        cpu_count = getCGroupLimitedCPUCores(cpu_count);
-#endif // OS_LINUX
-        return cpu_count;
-    }();
-    return number;
+    /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
+    cpu_count = getCGroupLimitedCPUCores(cpu_count);
+#endif
+
+    return cpu_count;
 }

From 364f10aff5f0b11a45aeb24eb933f13a87fdef5d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 22:56:51 +0200
Subject: [PATCH 09/47] Remove trash

---
 src/Common/getNumberOfPhysicalCPUCores.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index 872a2464eb6..f415ee58e85 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -39,7 +39,7 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
 }
 #endif
 
-unsigned getNumberOfPhysicalCPUCores()
+static unsigned getNumberOfPhysicalCPUCoresImpl()
 {
     unsigned cpu_count = 0; // start with an invalid num
 
@@ -72,3 +72,10 @@ unsigned getNumberOfPhysicalCPUCores()
 
     return cpu_count;
 }
+
+unsigned getNumberOfPhysicalCPUCores()
+{
+    /// Calculate once.
+    static auto res = getNumberOfPhysicalCPUCoresImpl();
+    return res;
+}

From 416fa95b8744825ec1c00c072b20bd404145198a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 23:57:09 +0200
Subject: [PATCH 10/47] Remove "Arcadia" build system

---
 src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h           | 2 --
 src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp | 2 --
 src/IO/ReadBufferFromAzureBlobStorage.cpp                   | 2 --
 src/IO/ReadBufferFromAzureBlobStorage.h                     | 2 --
 src/IO/WriteBufferFromAzureBlobStorage.cpp                  | 2 --
 src/IO/WriteBufferFromAzureBlobStorage.h                    | 2 --
 src/Server/CertificateReloader.h                            | 4 +---
 7 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
index 1cef6105d41..048daa7c9dc 100644
--- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
+++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
index 243452353d3..128c7534b3c 100644
--- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
+++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #include <Disks/DiskFactory.h>
 
diff --git a/src/IO/ReadBufferFromAzureBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp
index 0ce6db97437..593bd01023a 100644
--- a/src/IO/ReadBufferFromAzureBlobStorage.cpp
+++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h
index 78d973747ba..d743e725bda 100644
--- a/src/IO/ReadBufferFromAzureBlobStorage.h
+++ b/src/IO/ReadBufferFromAzureBlobStorage.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp
index 88882fcef65..0f8cfdf347d 100644
--- a/src/IO/WriteBufferFromAzureBlobStorage.cpp
+++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp
@@ -1,6 +1,4 @@
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/IO/WriteBufferFromAzureBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h
index cbbfb577a91..75336c497eb 100644
--- a/src/IO/WriteBufferFromAzureBlobStorage.h
+++ b/src/IO/WriteBufferFromAzureBlobStorage.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
 #include <Common/config.h>
-#endif
 
 #if USE_AZURE_BLOB_STORAGE
 
diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h
index 88c732c2db6..f984b4e4691 100644
--- a/src/Server/CertificateReloader.h
+++ b/src/Server/CertificateReloader.h
@@ -1,8 +1,6 @@
 #pragma once
 
-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif
+#include <Common/config.h>
 
 #if USE_SSL
 

From a0fe29e2433147756614133b1805703288aa077c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 15 Apr 2022 23:59:49 +0200
Subject: [PATCH 11/47] Remove Arcadia

---
 src/Interpreters/ActionsVisitor.cpp | 18 ++++++++++++++++++
 src/Interpreters/ActionsVisitor.h   | 27 ---------------------------
 2 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 181ac9aed7e..3bad8fba270 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -252,6 +252,17 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co
     return header.cloneWithColumns(std::move(columns));
 }
 
+
+namespace
+{
+
+/** Create a block for set from expression.
+  * 'set_element_types' - types of what are on the left hand side of IN.
+  * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
+  *
+  *  We need special implementation for ASTFunction, because in case, when we interpret
+  *  large tuple or array as function, `evaluateConstantExpression` works extremely slow.
+  */
 Block createBlockForSet(
     const DataTypePtr & left_arg_type,
     const ASTPtr & right_arg,
@@ -295,6 +306,10 @@ Block createBlockForSet(
     return block;
 }
 
+/** Create a block for set from literal.
+  * 'set_element_types' - types of what are on the left hand side of IN.
+  * 'right_arg' - Literal - Tuple or Array.
+  */
 Block createBlockForSet(
     const DataTypePtr & left_arg_type,
     const std::shared_ptr<ASTFunction> & right_arg,
@@ -346,6 +361,9 @@ Block createBlockForSet(
     return createBlockFromAST(elements_ast, set_element_types, context);
 }
 
+}
+
+
 SetPtr makeExplicitSet(
     const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
     ContextPtr context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index 342cc9eef9d..d1558cb961c 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -27,33 +27,6 @@ SetPtr makeExplicitSet(
     const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
     ContextPtr context, const SizeLimits & limits, PreparedSets & prepared_sets);
 
-/** Create a block for set from expression.
-  * 'set_element_types' - types of what are on the left hand side of IN.
-  * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
-  *
-  *  We need special implementation for ASTFunction, because in case, when we interpret
-  *  large tuple or array as function, `evaluateConstantExpression` works extremely slow.
-  *
-  *  Note: this and following functions are used in third-party applications in Arcadia, so
-  *  they should be declared in header file.
-  *
-  */
-Block createBlockForSet(
-    const DataTypePtr & left_arg_type,
-    const std::shared_ptr<ASTFunction> & right_arg,
-    const DataTypes & set_element_types,
-    ContextPtr context);
-
-/** Create a block for set from literal.
-  * 'set_element_types' - types of what are on the left hand side of IN.
-  * 'right_arg' - Literal - Tuple or Array.
-  */
-Block createBlockForSet(
-    const DataTypePtr & left_arg_type,
-    const ASTPtr & right_arg,
-    const DataTypes & set_element_types,
-    ContextPtr context);
-
 /** For ActionsVisitor
   * A stack of ExpressionActions corresponding to nested lambda expressions.
   * The new action should be added to the highest possible level.

From cbeeb7ec4f5d055344973d908c716e5d166ea0b3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:20:47 +0200
Subject: [PATCH 12/47] Remove Arcadia

---
 src/AggregateFunctions/UniquesHashSet.h       |   2 +-
 src/Common/Config/ConfigProcessor.h           |   3 +-
 src/Common/HashTable/Hash.h                   |   2 +-
 src/Common/OptimizedRegularExpression.cpp     |   2 +-
 src/Common/StringUtils/CMakeLists.txt         |   2 +-
 src/Common/ZooKeeper/ZooKeeperCommon.h        |   2 +-
 src/Common/ZooKeeper/examples/CMakeLists.txt  |   3 -
 .../examples/zk_many_watches_reconnect.cpp    |  66 ---------
 src/Common/mysqlxx/mysqlxx/mysqlxx.h          |   6 +-
 src/Common/parseAddress.h                     |   4 +-
 src/Functions/DateTimeTransforms.h            |   2 +-
 src/Functions/FunctionFactory.h               |   2 +-
 src/Functions/FunctionMathUnary.h             |   2 +-
 src/Functions/FunctionsEmbeddedDictionaries.h |   2 +-
 src/Functions/FunctionsExternalDictionaries.h |   2 +-
 src/Functions/FunctionsVisitParam.h           |   2 +-
 src/Functions/URL/FunctionsURL.h              |   2 +-
 src/Functions/array/arrayEnumerateRanked.h    |   2 +-
 src/Functions/array/arrayIndex.h              |   4 +-
 src/Functions/extractTextFromHTML.cpp         |   2 +-
 src/Functions/timeSlots.cpp                   |   2 +-
 src/IO/ReadHelpers.cpp                        |   2 +-
 src/IO/tests/gtest_s3_uri.cpp                 |  34 ++---
 ...OptimizeIfWithConstantConditionVisitor.cpp |   2 +-
 src/Parsers/ASTFunction.cpp                   |   2 +-
 src/Parsers/ExpressionElementParsers.cpp      |   2 +-
 .../Impl/JSONAsStringRowInputFormat.cpp       |   1 +
 .../Algorithms/tests/gtest_graphite.cpp       |   8 +-
 .../Transforms/ExpressionTransform.h          |   2 +-
 src/Storages/examples/CMakeLists.txt          |  13 --
 .../examples/remove_symlink_directory.cpp     |  35 -----
 .../examples/transform_part_zk_nodes.cpp      | 131 ------------------
 32 files changed, 49 insertions(+), 299 deletions(-)
 delete mode 100644 src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
 delete mode 100644 src/Storages/examples/remove_symlink_directory.cpp
 delete mode 100644 src/Storages/examples/transform_part_zk_nodes.cpp

diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h
index 6837803c67d..8648f6e2500 100644
--- a/src/AggregateFunctions/UniquesHashSet.h
+++ b/src/AggregateFunctions/UniquesHashSet.h
@@ -59,7 +59,7 @@
 
 
 /** This hash function is not the most optimal, but UniquesHashSet states counted with it,
-  * stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
+  * stored in many places on disks (in many companies), so it continues to be used.
   */
 struct UniquesHashSetDefaultHash
 {
diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h
index 6c642690945..0e1d0facf48 100644
--- a/src/Common/Config/ConfigProcessor.h
+++ b/src/Common/Config/ConfigProcessor.h
@@ -55,7 +55,7 @@ public:
     /// 2) Determine the includes file from the config: <include_from>/path2/metrika.xml</include_from>
     ///    If this path is not configured, use /etc/metrika.xml
     /// 3) Replace elements matching the "<foo incl="bar"/>" pattern with
-    ///    "<foo>contents of the yandex/bar element in metrika.xml</foo>"
+    ///    "<foo>contents of the clickhouse/bar element in metrika.xml</foo>"
     /// 4) If zk_node_cache is non-NULL, replace elements matching the "<foo from_zk="/bar">" pattern with
     ///    "<foo>contents of the /bar ZooKeeper node</foo>".
     ///    If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true.
@@ -137,4 +137,3 @@ private:
 };
 
 }
-
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 5dbeeecf96b..3cf8978f418 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -13,7 +13,7 @@
   *
   * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
   * This is because of following reasons:
-  * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
+  * - in Metrica web analytics system, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
   * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
   * - traffic is non-uniformly distributed across a day;
   * - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 1464923e6ab..da348adbe31 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -28,7 +28,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
       *  in which all metacharacters are escaped,
       *  and also if there are no '|' outside the brackets,
       *  and also avoid substrings of the form `http://` or `www` and some other
-      *   (this is the hack for typical use case in Yandex.Metrica).
+      *   (this is the hack for typical use case in web analytics applications).
       */
     const char * begin = regexp.data();
     const char * pos = begin;
diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt
index 4eedbf4842b..57c196d335c 100644
--- a/src/Common/StringUtils/CMakeLists.txt
+++ b/src/Common/StringUtils/CMakeLists.txt
@@ -1,4 +1,4 @@
-# These files are located in separate library, because they are used by Yandex.Metrika code
+# These files are located in separate library, because they are used by separate products
 #  in places when no dependency on whole "dbms" library is possible.
 
 include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index e51bea3f7f8..532488c08f8 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -45,7 +45,7 @@ struct ZooKeeperResponse : virtual Response
 
 using ZooKeeperResponsePtr = std::shared_ptr<ZooKeeperResponse>;
 
-/// Exposed in header file for Yandex.Metrica code.
+/// Exposed in header file for some external code.
 struct ZooKeeperRequest : virtual Request
 {
     XID xid = 0;
diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt
index 8bec951e24f..b449b172605 100644
--- a/src/Common/ZooKeeper/examples/CMakeLists.txt
+++ b/src/Common/ZooKeeper/examples/CMakeLists.txt
@@ -7,8 +7,5 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo
 add_executable(zkutil_test_async zkutil_test_async.cpp)
 target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
 
-add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
-target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_config)
-
 add_executable (zookeeper_impl zookeeper_impl.cpp)
 target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log)
diff --git a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp b/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
deleted file mode 100644
index cf819121234..00000000000
--- a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <Common/Config/ConfigProcessor.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Poco/Event.h>
-#include <iostream>
-
-/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706
-/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message
-/// exceeds jute.maxbuffer (0xfffff by default).
-/// This happens when the number of watches exceeds ~29000.
-///
-/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g.
-/// sudo ip6tables -A OUTPUT -d mtzoo01it.haze.yandex.net -j REJECT
-
-const size_t N_THREADS = 100;
-
-int main(int argc, char ** argv)
-{
-    try
-    {
-        if (argc != 3)
-        {
-            std::cerr << "usage: " << argv[0] << " <zookeeper_config> <number_of_watches>" << std::endl;
-            return 3;
-        }
-
-        DB::ConfigProcessor processor(argv[1], false, true);
-        auto config = processor.loadConfig().configuration;
-        zkutil::ZooKeeper zk(*config, "zookeeper", nullptr);
-        zkutil::EventPtr watch = std::make_shared<Poco::Event>();
-
-        /// NOTE: setting watches in multiple threads because doing it in a single thread is too slow.
-        size_t watches_per_thread = std::stoull(argv[2]) / N_THREADS;
-        std::vector<std::thread> threads;
-        for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
-        {
-            threads.emplace_back([&, i_thread]
-                {
-                    for (size_t i = 0; i < watches_per_thread; ++i)
-                        zk.exists("/clickhouse/nonexistent_node" + std::to_string(i * N_THREADS + i_thread), nullptr, watch);
-                });
-        }
-        for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
-            threads[i_thread].join();
-
-        while (true)
-        {
-            std::cerr << "WAITING..." << std::endl;
-            sleep(10);
-        }
-    }
-    catch (Poco::Exception & e)
-    {
-        std::cerr << "Exception: " << e.displayText() << std::endl;
-        return 1;
-    }
-    catch (std::exception & e)
-    {
-        std::cerr << "std::exception: " << e.what() << std::endl;
-        return 3;
-    }
-    catch (...)
-    {
-        std::cerr << "Some exception" << std::endl;
-        return 2;
-    }
-}
diff --git a/src/Common/mysqlxx/mysqlxx/mysqlxx.h b/src/Common/mysqlxx/mysqlxx/mysqlxx.h
index 785d4361fd7..0b6cf5e7c1a 100644
--- a/src/Common/mysqlxx/mysqlxx/mysqlxx.h
+++ b/src/Common/mysqlxx/mysqlxx/mysqlxx.h
@@ -10,10 +10,10 @@
 
 /** 'mysqlxx' - very simple library for replacement of 'mysql++' library.
   *
-  * For whatever reason, in Yandex.Metrica, back in 2008, 'mysql++' library was used.
+  * For whatever reason, in Metrica web analytics system, back in 2008, 'mysql++' library was used.
   * There are the following shortcomings of 'mysql++':
   * 1. Too rich functionality: most of it is not used.
-  * 2. Low performance (when used for Yandex.Metrica).
+  * 2. Low performance (when used for Metrica).
   *
   * Low performance is caused by the following reasons:
   *
@@ -50,7 +50,7 @@
   * And for the sake of simplicity, some functions work only with certain assumptions,
   *  or with slightly different semantic than in mysql++.
   * And we don't care about cross-platform usage of mysqlxx.
-  * These assumptions are specific for Yandex.Metrica. Your mileage may vary.
+  * These assumptions are specific for Metrica. Your mileage may vary.
   *
   * mysqlxx could not be considered as separate full-featured library,
   *  because it is developed from the principle - "everything that we don't need is not implemented".
diff --git a/src/Common/parseAddress.h b/src/Common/parseAddress.h
index 86340982c67..297934a6379 100644
--- a/src/Common/parseAddress.h
+++ b/src/Common/parseAddress.h
@@ -13,8 +13,8 @@ namespace DB
   * Otherwise, an exception is thrown.
   *
   * Examples:
-  *  yandex.ru - returns "yandex.ru" and default_port
-  *  yandex.ru:80 - returns "yandex.ru" and 80
+  *  clickhouse.com - returns "clickhouse.com" and default_port
+  *  clickhouse.com:80 - returns "clickhouse.com" and 80
   *  [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
   */
 std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 3a7baf45adc..5d1bcaf48cf 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -539,7 +539,7 @@ struct ToStartOfFifteenMinutesImpl
     using FactorTransform = ZeroTransform;
 };
 
-/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Yandex.Metrica.
+/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Metrica web analytics system.
 struct TimeSlotImpl
 {
     static constexpr auto name = "timeSlot";
diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h
index 898a463cd58..13b14559ec4 100644
--- a/src/Functions/FunctionFactory.h
+++ b/src/Functions/FunctionFactory.h
@@ -40,7 +40,7 @@ public:
             registerFunction(name, &Function::create, case_sensitiveness);
     }
 
-    /// This function is used by YQL - internal Yandex product that depends on ClickHouse by source code.
+    /// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code.
     std::vector<std::string> getAllNames() const;
 
     bool has(const std::string & name) const;
diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h
index fa10c004e87..bd656db792b 100644
--- a/src/Functions/FunctionMathUnary.h
+++ b/src/Functions/FunctionMathUnary.h
@@ -10,7 +10,7 @@
 
 #include "config_functions.h"
 
-/** FastOps is a fast vector math library from Mikhail Parakhin (former Yandex CTO),
+/** FastOps is a fast vector math library from Mikhail Parakhin, https://www.linkedin.com/in/mikhail-parakhin/
   * Enabled by default.
   */
 #if USE_FASTOPS
diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h
index c6ea886b4a8..20be3ee3cce 100644
--- a/src/Functions/FunctionsEmbeddedDictionaries.h
+++ b/src/Functions/FunctionsEmbeddedDictionaries.h
@@ -33,7 +33,7 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-/** Functions using Yandex.Metrica dictionaries
+/** Functions using deprecated dictionaries
   * - dictionaries of regions, operating systems, search engines.
   *
   * Climb up the tree to a certain level.
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 6a701d7b864..189ec7321c1 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -195,7 +195,7 @@ public:
           *  that is the initiator of a distributed query,
           *  in the case when the function will be invoked for real data only at the remote servers.
           * This feature is controversial and implemented specially
-          *  for backward compatibility with the case in Yandex Banner System.
+          *  for backward compatibility with the case in the Banner System application.
           */
         if (input_rows_count == 0)
             return result_type->createColumn();
diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h
index 362c3bcd693..09fcf8659ed 100644
--- a/src/Functions/FunctionsVisitParam.h
+++ b/src/Functions/FunctionsVisitParam.h
@@ -12,7 +12,7 @@
 
 
 /** Functions for retrieving "visit parameters".
- * Visit parameters in Yandex.Metrika are a special kind of JSONs.
+ * Visit parameters in Metrica web analytics system are a special kind of JSONs.
  * These functions are applicable to almost any JSONs.
  * Implemented via templates from FunctionsStringSearch.h.
  *
diff --git a/src/Functions/URL/FunctionsURL.h b/src/Functions/URL/FunctionsURL.h
index 297b62ca256..a0f106742fb 100644
--- a/src/Functions/URL/FunctionsURL.h
+++ b/src/Functions/URL/FunctionsURL.h
@@ -45,7 +45,7 @@ namespace DB
   * Remove specified parameter from URL.
   *  cutURLParameter(URL, name)
   *
-  * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs.
+  * Get array of URL 'hierarchy' as in web-analytics tree-like reports. See the docs.
   *  URLHierarchy(URL)
   */
 
diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h
index d6a62a966ae..ac3936af0fd 100644
--- a/src/Functions/array/arrayEnumerateRanked.h
+++ b/src/Functions/array/arrayEnumerateRanked.h
@@ -15,7 +15,7 @@
 // for better debug: #include <Core/iostream_debug_helpers.h>
 
 /** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
-  * This is very unusual function made as a special order for Yandex.Metrica.
+  * This is very unusual function made as a special order for our dear customer - Metrica web analytics system.
   *
   * arrayEnumerateUniqRanked(['hello', 'world', 'hello']) = [1, 1, 2]
   * - it returns similar structured array containing number of occurrence of the corresponding value.
diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h
index 8b42b99cd69..0dbbe5e41b6 100644
--- a/src/Functions/array/arrayIndex.h
+++ b/src/Functions/array/arrayIndex.h
@@ -718,9 +718,7 @@ private:
     /**
      * Catches arguments of type LowCardinality(T) (left) and U (right).
      *
-     * The perftests
-     * https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
-     * showed that the amount of action needed to convert the non-constant right argument to the index column
+     * The perftests showed that the amount of action needed to convert the non-constant right argument to the index column
      * (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
      *
      * So, in terms of performance it's more optimal to fall back to default implementation and catch only constant
diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
index b38ea74d6ce..f321a59f734 100644
--- a/src/Functions/extractTextFromHTML.cpp
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -53,7 +53,7 @@
   *
   * Usage example:
   *
-  * SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
+  * SELECT extractTextFromHTML(html) FROM url('https://github.com/ClickHouse/ClickHouse', RawBLOB, 'html String')
   *
   * - ClickHouse has embedded web browser.
   */
diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp
index 34af410befa..a19ccf62565 100644
--- a/src/Functions/timeSlots.cpp
+++ b/src/Functions/timeSlots.cpp
@@ -29,7 +29,7 @@ namespace
   *  For example, timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')].
   *  This is necessary to search for hits that are part of the corresponding visit.
   *
-  * This is obsolete function. It was developed for Yandex.Metrica, but no longer used in Yandex.
+  * This is obsolete function. It was developed for Metrica web analytics system, but the art of its usage has been forgotten.
   * But this function was adopted by wider audience.
   */
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index bf3cccccab8..8aee5713cdc 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -343,7 +343,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
             && decoded_char != '"'
             && decoded_char != '`'  /// MySQL style identifiers
             && decoded_char != '/'  /// JavaScript in HTML
-            && decoded_char != '='  /// Yandex's TSKV
+            && decoded_char != '='  /// TSKV format invented somewhere
             && !isControlASCII(decoded_char))
         {
             s.push_back('\\');
diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp
index 7ee72069e57..20d19437c64 100644
--- a/src/IO/tests/gtest_s3_uri.cpp
+++ b/src/IO/tests/gtest_s3_uri.cpp
@@ -16,36 +16,36 @@ class S3UriTest : public testing::TestWithParam<std::string>
 TEST(S3UriTest, validPatterns)
 {
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://s3.yandexcloud.net/jokserfn/"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://s3.amazonaws.com/jokserfn/"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://yandexcloud.net/bucket/"));
-        ASSERT_EQ("https://yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://amazonaws.com/bucket/"));
+        ASSERT_EQ("https://amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucket", uri.bucket);
         ASSERT_EQ("", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/data"));
-        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/data"));
+        ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data"));
-        ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint);
+        S3::URI uri(Poco::URI("https://storage.amazonaws.com/jokserfn/data"));
+        ASSERT_EQ("https://storage.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
@@ -97,13 +97,13 @@ INSTANTIATE_TEST_SUITE_P(
     S3UriTest,
     testing::Values(
         "https:///",
-        "https://.s3.yandexcloud.net/key",
-        "https://s3.yandexcloud.net/key",
-        "https://jokserfn.s3yandexcloud.net/key",
-        "https://s3.yandexcloud.net//",
-        "https://yandexcloud.net/",
-        "https://yandexcloud.net//",
-        "https://yandexcloud.net//key"));
+        "https://.s3.amazonaws.com/key",
+        "https://s3.amazonaws.com/key",
+        "https://jokserfn.s3amazonaws.com/key",
+        "https://s3.amazonaws.com//",
+        "https://amazonaws.com/",
+        "https://amazonaws.com//",
+        "https://amazonaws.com//key"));
 
 }
 
diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
index 802bf4e43ce..28b19f6670d 100644
--- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
+++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
@@ -28,7 +28,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v
     }
 
     /// cast of numeric constant in condition to UInt8
-    /// Note: this solution is ad-hoc and only implemented for yandex.metrica use case.
+    /// Note: this solution is ad-hoc and only implemented for metrica use case (one of the best customers).
     /// We should allow any constant condition (or maybe remove this optimization completely) later.
     if (const auto * function = condition->as<ASTFunction>())
     {
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index f9a5c7be75f..b86929b054c 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -166,7 +166,7 @@ ASTPtr ASTFunction::toLiteral() const
   * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes.
   * With highlighting we make it clearly obvious.
   *
-  * Another case is regexp match. Suppose the user types match(URL, 'www.yandex.ru'). It often means that the user is unaware that . is a metacharacter.
+  * Another case is regexp match. Suppose the user types match(URL, 'www.clickhouse.com'). It often means that the user is unaware that . is a metacharacter.
   */
 static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const IAST::FormatSettings & settings, const char * metacharacters)
 {
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index cd07e304a39..29c7846283e 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1727,7 +1727,7 @@ const char * ParserAlias::restricted_keywords[] =
     "NOT",
     "OFFSET",
     "ON",
-    "ONLY", /// YQL synonym for ANTI. Note: YQL is the name of one of Yandex proprietary languages, completely unrelated to ClickHouse.
+    "ONLY", /// YQL's synonym for ANTI. Note: YQL is the name of one of proprietary languages, completely unrelated to ClickHouse.
     "ORDER",
     "PREWHERE",
     "RIGHT",
diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
index 9bf1682b77e..e31006ff0f6 100644
--- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp
@@ -52,6 +52,7 @@ void JSONAsRowInputFormat::readSuffix()
     {
         assertChar(']', *buf);
         skipWhitespaceIfAny(*buf);
+        data_in_square_brackets = false;
     }
     if (!buf->eof() && *buf->position() == ';')
     {
diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
index 1d739bf566a..9160ad6e0fa 100644
--- a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
+++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
@@ -149,7 +149,7 @@ TEST(GraphiteTest, testSelectPattern)
     using namespace std::literals;
 
     std::string
-        xml(R"END(<yandex>
+        xml(R"END(<clickhouse>
 <graphite_rollup>
     <pattern>
         <regexp>\.sum$</regexp>
@@ -210,7 +210,7 @@ TEST(GraphiteTest, testSelectPattern)
         </retention>
     </default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
 )END");
 
     // Retentions must be ordered by 'age' descending.
@@ -370,7 +370,7 @@ TEST(GraphiteTest, testSelectPatternTyped)
     using namespace std::literals;
 
     std::string
-        xml(R"END(<yandex>
+        xml(R"END(<clickhouse>
 <graphite_rollup>
     <pattern>
         <rule_type>plain</rule_type>
@@ -488,7 +488,7 @@ TEST(GraphiteTest, testSelectPatternTyped)
         </retention>
     </default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
 )END");
 
     // Retentions must be ordered by 'age' descending.
diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h
index ea73c8fb1da..791c7d7ba73 100644
--- a/src/Processors/Transforms/ExpressionTransform.h
+++ b/src/Processors/Transforms/ExpressionTransform.h
@@ -12,7 +12,7 @@ class ActionsDAG;
 
 /** Executes a certain expression over the block.
   * The expression consists of column identifiers from the block, constants, common functions.
-  * For example: hits * 2 + 3, url LIKE '%yandex%'
+  * For example: hits * 2 + 3, url LIKE '%clickhouse%'
   * The expression processes each row independently of the others.
   */
 class ExpressionTransform final : public ISimpleTransform
diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt
index 103972a106f..ca85ca9d98f 100644
--- a/src/Storages/examples/CMakeLists.txt
+++ b/src/Storages/examples/CMakeLists.txt
@@ -1,6 +1,3 @@
-add_executable (remove_symlink_directory remove_symlink_directory.cpp)
-target_link_libraries (remove_symlink_directory PRIVATE dbms)
-
 add_executable (merge_selector merge_selector.cpp)
 target_link_libraries (merge_selector PRIVATE dbms)
 
@@ -12,13 +9,3 @@ target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse
 
 add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp)
 target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
-
-add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp)
-target_link_libraries (transform_part_zk_nodes
-    PRIVATE
-        boost::program_options
-        clickhouse_common_config
-        clickhouse_common_zookeeper
-        dbms
-        string_utils
-)
diff --git a/src/Storages/examples/remove_symlink_directory.cpp b/src/Storages/examples/remove_symlink_directory.cpp
deleted file mode 100644
index db436c0a608..00000000000
--- a/src/Storages/examples/remove_symlink_directory.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <unistd.h>
-#include <iostream>
-#include <Common/Exception.h>
-#include <Common/filesystemHelpers.h>
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace DB
-{
-    namespace ErrorCodes
-    {
-        extern const int SYSTEM_ERROR;
-    }
-}
-
-int main(int, char **)
-try
-{
-    fs::path dir("./test_dir/");
-    fs::create_directories(dir);
-    FS::createFile("./test_dir/file");
-
-    if (0 != symlink("./test_dir", "./test_link"))
-        DB::throwFromErrnoWithPath("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR);
-
-    fs::rename("./test_link", "./test_link2");
-    fs::remove_all("./test_link2");
-    return 0;
-}
-catch (...)
-{
-    std::cerr << DB::getCurrentExceptionMessage(false) << "\n";
-    return 1;
-}
diff --git a/src/Storages/examples/transform_part_zk_nodes.cpp b/src/Storages/examples/transform_part_zk_nodes.cpp
deleted file mode 100644
index 3cbcc76190e..00000000000
--- a/src/Storages/examples/transform_part_zk_nodes.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
-#include <Common/Exception.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/KeeperException.h>
-
-#include <boost/program_options.hpp>
-
-#include <list>
-#include <iostream>
-
-
-int main(int argc, char ** argv)
-try
-{
-    boost::program_options::options_description desc("Allowed options");
-    desc.add_options()
-        ("help,h", "produce help message")
-        ("address,a", boost::program_options::value<std::string>()->required(),
-            "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
-        ("path,p", boost::program_options::value<std::string>()->required(),
-            "where to start")
-    ;
-
-    boost::program_options::variables_map options;
-    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
-
-    if (options.count("help"))
-    {
-        std::cout << "Transform contents of part nodes in ZooKeeper to more compact storage scheme." << std::endl;
-        std::cout << "Usage: " << argv[0] << " [options]" << std::endl;
-        std::cout << desc << std::endl;
-        return 1;
-    }
-
-    zkutil::ZooKeeper zookeeper(options.at("address").as<std::string>());
-
-    std::string initial_path = options.at("path").as<std::string>();
-
-    struct Node
-    {
-        Node(
-            std::string path_,
-            std::future<Coordination::GetResponse> get_future_,
-            std::future<Coordination::ListResponse> children_future_,
-            Node * parent_)
-            : path(std::move(path_))
-            , get_future(std::move(get_future_))
-            , children_future(std::move(children_future_))
-            , parent(parent_)
-        {
-        }
-
-        std::string path;
-        std::future<Coordination::GetResponse> get_future;
-        std::future<Coordination::ListResponse> children_future;
-
-        Node * parent = nullptr;
-        std::future<Coordination::MultiResponse> set_future;
-    };
-
-    std::list<Node> nodes_queue;
-    nodes_queue.emplace_back(
-        initial_path, zookeeper.asyncGet(initial_path), zookeeper.asyncGetChildren(initial_path), nullptr);
-
-    for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it)
-    {
-        Coordination::GetResponse get_response;
-        Coordination::ListResponse children_response;
-        try
-        {
-            get_response = it->get_future.get();
-            children_response = it->children_future.get();
-        }
-        catch (const Coordination::Exception & e)
-        {
-            if (e.code == Coordination::Error::ZNONODE)
-                continue;
-            throw;
-        }
-
-        if (get_response.stat.ephemeralOwner)
-            continue;
-
-        if (it->path.find("/parts/") != std::string::npos
-            && !endsWith(it->path, "/columns")
-            && !endsWith(it->path, "/checksums"))
-        {
-            /// The node is related to part.
-
-            /// If it is the part in old format (the node contains children) - convert it to the new format.
-            if (!children_response.names.empty())
-            {
-                auto part_header =  DB::ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(
-                    zookeeper.get(it->path + "/columns"), zookeeper.get(it->path + "/checksums"));
-
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/columns", -1));
-                ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/checksums", -1));
-                ops.emplace_back(zkutil::makeSetRequest(it->path, part_header.toString(), -1));
-
-                it->set_future = zookeeper.asyncMulti(ops);
-            }
-        }
-        else
-        {
-            /// Recursively add children to the queue.
-            for (const auto & name : children_response.names)
-            {
-                std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name;
-                nodes_queue.emplace_back(
-                    child_path, zookeeper.asyncGet(child_path), zookeeper.asyncGetChildren(child_path),
-                    &(*it));
-            }
-        }
-    }
-
-    for (auto & node : nodes_queue)
-    {
-        if (node.set_future.valid())
-        {
-            node.set_future.get();
-            std::cerr << node.path << " changed!" << std::endl;
-        }
-    }
-}
-catch (...)
-{
-    std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
-    throw;
-}

From e128d89957816877944e37531a4e9acc0ed477b5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:24:58 +0200
Subject: [PATCH 13/47] Remove Arcadia

---
 src/Disks/DiskWebServer.h                     |   2 +-
 .../clickhouse-diagnostics                    |   2 +-
 utils/graphite-rollup/rollup-tag-list.xml     |   4 +-
 utils/graphite-rollup/rollup-typed.xml        |   4 +-
 utils/graphite-rollup/rollup.xml              |   4 +-
 utils/release/push_packages                   | 250 ------------------
 utils/s3tools/s3uploader                      |   4 +-
 .../main.cpp                                  |   2 +-
 .../main.cpp                                  |   2 +-
 utils/zookeeper-dump-tree/main.cpp            |   2 +-
 utils/zookeeper-remove-by-list/main.cpp       |   2 +-
 11 files changed, 14 insertions(+), 264 deletions(-)
 delete mode 100755 utils/release/push_packages

diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h
index 6341b582174..98f92fe5986 100644
--- a/src/Disks/DiskWebServer.h
+++ b/src/Disks/DiskWebServer.h
@@ -20,7 +20,7 @@ namespace ErrorCodes
  *       <disks>
  *           <web>
  *               <type>web</type>
- *               <endpoint>https://clickhouse-datasets.s3.yandex.net/disk-with-static-files-tests/test-hits/</endpoint>
+ *               <endpoint>https://clickhouse-datasets.s3.amazonaws.com/disk-with-static-files-tests/test-hits/</endpoint>
  *           </web>
  *       </disks>
  *       <policies>
diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics
index 2fe67071c3c..cf65e4efbfb 100755
--- a/utils/clickhouse-diagnostics/clickhouse-diagnostics
+++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics
@@ -665,7 +665,7 @@ class DiagnosticsData:
 
     def _dump_wiki(self):
         """
-        Dump diagnostic data in Yandex wiki format.
+        Dump diagnostic data in Wiki format.
         """
 
         def _write_title(buffer, value):
diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml
index ef28f2089ad..edab2f16436 100644
--- a/utils/graphite-rollup/rollup-tag-list.xml
+++ b/utils/graphite-rollup/rollup-tag-list.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
         <rule_type>plain</rule_type>
@@ -164,4 +164,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml
index 0b27d43ece9..ace439dba4a 100644
--- a/utils/graphite-rollup/rollup-typed.xml
+++ b/utils/graphite-rollup/rollup-typed.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
         <rule_type>plain</rule_type>
@@ -164,4 +164,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml
index 641b0130509..2089605c8bf 100644
--- a/utils/graphite-rollup/rollup.xml
+++ b/utils/graphite-rollup/rollup.xml
@@ -1,4 +1,4 @@
-<yandex>
+<clickhouse>
 <graphite_rollup>
 	<pattern>
  		<regexp>\.sum$</regexp>
@@ -144,4 +144,4 @@
  		</retention>
  	</default>
 </graphite_rollup>
-</yandex>
+</clickhouse>
diff --git a/utils/release/push_packages b/utils/release/push_packages
deleted file mode 100755
index e25cb325c71..00000000000
--- a/utils/release/push_packages
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import argparse
-import subprocess
-import os
-import logging
-import shutil
-import base64
-import pexpect
-
-
-# Do nothing if keys are not provided
-class GpgKey(object):
-    gnupg_dir = os.path.expanduser('~/.gnupg')
-    TEMPGNUPG_DIR = os.path.expanduser('~/.local/tempgnupg')
-
-    def __init__(self, secret_key_path, public_key_path):
-        if secret_key_path and public_key_path:
-            with open(secret_key_path, 'r') as sec, open(public_key_path, 'r') as pub:
-                self._secret_key = sec.read()
-                self._public_key = pub.read()
-        else:
-            self._secret_key = None
-            self._public_key = None
-
-    def __enter__(self):
-        if self._secret_key and self._public_key:
-            if os.path.exists(self.gnupg_dir):
-                shutil.move(self.gnupg_dir, self.TEMPGNUPG_DIR)
-            os.mkdir(self.gnupg_dir)
-            open(os.path.join(self.gnupg_dir, 'secring.gpg'), 'wb').write(base64.b64decode(self._secret_key))
-            open(os.path.join(self.gnupg_dir, 'pubring.gpg'), 'wb').write(base64.b64decode(self._public_key))
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self._secret_key and self._public_key:
-            shutil.rmtree(self.gnupg_dir)
-            if os.path.exists(self.TEMPGNUPG_DIR):
-                shutil.move(self.TEMPGNUPG_DIR, self.gnupg_dir)
-
-
-class DebRelease(object):
-
-    DUPLOAD_CONF_TEMPLATE = '\n\t'.join((
-        "$cfg{{'{title}'}} = {{",
-        'fqdn => "{fqdn}",',
-        'method => "{method}",',
-        'login => "{login}",',
-        'incoming => "{incoming}",',
-        'options => "{options}",',
-        'dinstall_runs => {dinstall_runs},\n}};',))
-    DUPLOAD_CONF_PATH = os.path.expanduser('~/.dupload.conf')
-    DUPLOAD_CONF_TMP_PATH = os.path.expanduser('~/.local/tmp_dupload.cnf')
-
-    def __init__(self, dupload_config, login, ssh_key_path):
-        self.__config = {}
-        for repo, conf in dupload_config.items():
-            d = {
-                "fqdn": conf["fqdn"],
-                "method": "scpb",
-                "login": login,
-                "incoming": conf["incoming"],
-                "dinstall_runs": 0,
-                "options": "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=3",
-            }
-            d.update(conf)
-            self.__config[repo] = d
-        print(self.__config)
-        self.ssh_key_path = ssh_key_path
-
-    def __enter__(self):
-        if os.path.exists(self.DUPLOAD_CONF_PATH):
-            shutil.move(self.DUPLOAD_CONF_PATH, self.DUPLOAD_CONF_TMP_PATH)
-        self.__dupload_conf = open(self.DUPLOAD_CONF_PATH, 'w')
-        self.__dupload_conf.write('package config;\n\n$default_host = undef;\n\n' + '\n\n'.join([
-            self.DUPLOAD_CONF_TEMPLATE.format(title=title, **values)
-            for title, values in self.__config.items()]))
-        self.__dupload_conf.write('\n')
-        self.__dupload_conf.close()
-        if self.ssh_key_path:
-            subprocess.check_call("ssh-add {}".format(self.ssh_key_path), shell=True)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if os.path.exists(self.DUPLOAD_CONF_TMP_PATH):
-            shutil.move(self.DUPLOAD_CONF_TMP_PATH, self.DUPLOAD_CONF_PATH)
-        else:
-            os.unlink(self.DUPLOAD_CONF_PATH)
-
-
-class SSHConnection(object):
-    def __init__(self, user, host, ssh_key=None):
-        if ssh_key:
-            key_str = "-i {}".format(ssh_key)
-        else:
-            key_str = ""
-
-        self.base_cmd = "ssh {key} {user}@{host}".format(
-            key=key_str, user=user, host=host)
-
-    def execute(self, cmd):
-        logging.info("Executing remote cmd %s", cmd)
-        subprocess.check_call(self.base_cmd + ' "{cmd}"'.format(cmd=cmd),
-                              shell=True)
-
-
-def debsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            cmd = ('debsign -k \'{key}\' -p"gpg --verbose --no-use-agent --batch '
-                   '--no-tty --passphrase {passphrase}" {path}/*.changes').format(
-                       key=gpg_user, passphrase=gpg_passphrase, path=path)
-            logging.info("Build debsign cmd '%s'", cmd)
-            subprocess.check_call(cmd, shell=True)
-            logging.info("debsign finished")
-    except Exception as ex:
-        logging.error("Cannot debsign packages on path %s, with user key", path)
-        raise ex
-
-def rpmsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user):
-    try:
-        with GpgKey(gpg_sec_key_path, gpg_pub_key_path):
-            for package in os.listdir(path):
-                package_path = os.path.join(path, package)
-                logging.info("Signing %s", package_path)
-                proc = pexpect.spawn('rpm --resign -D "_signature gpg" -D "_gpg_name {username}" {package}'.format(username=gpg_user, package=package_path))
-                proc.expect_exact("Enter pass phrase: ")
-                proc.sendline(gpg_passphrase)
-                proc.expect(pexpect.EOF)
-                logging.info("Signed successfully")
-    except Exception as ex:
-        logging.error("Cannot rpmsign packages on path %s, with user key", path)
-        raise ex
-
-def transfer_packages_scp(ssh_key, path, repo_user, repo_url, incoming_directory):
-    logging.info("Transferring packages via scp to %s", repo_url)
-    if ssh_key:
-        key_str = "-i {}".format(ssh_key)
-    else:
-        key_str = ""
-    subprocess.check_call('scp {key_str} {path}/* {user}@{repo}:{incoming}'.format(
-        path=path, user=repo_user, repo=repo_url, key_str=key_str, incoming=incoming_directory), shell=True)
-    logging.info("Transfer via scp finished")
-
-def transfer_packages_dupload(ssh_key, path, repo_user, repo_url, incoming_directory):
-    repo_short_name = repo_url.split('.')[0]
-    config = {
-        repo_short_name: {
-            "fqdn": repo_url,
-            "incoming": incoming_directory,
-        }
-    }
-    with DebRelease(config, repo_user, ssh_key):
-        logging.info("Duploading")
-        subprocess.check_call("dupload -f --nomail --to {repo} {path}".format(repo=repo_short_name, path=path), shell=True)
-        logging.info("Dupload finished")
-
-
-def clear_old_incoming_packages(ssh_connection, user):
-    for pkg in ('deb', 'rpm', 'tgz'):
-        for release_type in ('stable', 'testing', 'prestable', 'lts'):
-            try:
-                ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/{release_type}/*".format(
-                    user=user, pkg=pkg, release_type=release_type))
-            except Exception:
-                logging.info("rm is not required")
-
-
-def _get_incoming_path(repo_url, user=None, pkg_type=None, release_type=None):
-    if repo_url == 'repo.mirror.yandex.net':
-        return "/home/{user}/incoming/clickhouse/{pkg}/{release_type}".format(
-            user=user, pkg=pkg_type, release_type=release_type)
-    else:
-        return "/repo/{0}/mini-dinstall/incoming/".format(repo_url.split('.')[0])
-
-
-def _fix_args(args):
-
-    if args.gpg_sec_key_path and not os.path.isabs(args.gpg_sec_key_path):
-        args.gpg_sec_key_path = os.path.join(os.getcwd(), args.gpg_sec_key_path)
-
-    if args.gpg_pub_key_path and not os.path.isabs(args.gpg_pub_key_path):
-        args.gpg_pub_key_path = os.path.join(os.getcwd(), args.gpg_pub_key_path)
-
-    if args.ssh_key_path and not os.path.isabs(args.ssh_key_path):
-        args.ssh_key_path = os.path.join(os.getcwd(), args.ssh_key_path)
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
-    parser = argparse.ArgumentParser(description="Program to push clickhouse packages to repository")
-    parser.add_argument('--deb-directory')
-    parser.add_argument('--rpm-directory')
-    parser.add_argument('--tgz-directory')
-    parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable', 'lts'), default='testing')
-    parser.add_argument('--ssh-key-path')
-    parser.add_argument('--gpg-passphrase', required=True)
-    parser.add_argument('--gpg-sec-key-path')
-    parser.add_argument('--gpg-pub-key-path')
-    parser.add_argument('--gpg-key-user', default='robot-clickhouse')
-    parser.add_argument('--repo-url', default='repo.mirror.yandex.net')
-    parser.add_argument('--repo-user', default='buildfarm')
-
-    args = parser.parse_args()
-    if args.deb_directory is None and args.rpm_directory is None and args.tgz_directory is None:
-        parser.error('At least one package directory required')
-
-    _fix_args(args)
-
-    is_open_source = args.repo_url == 'repo.mirror.yandex.net'
-    ssh_connection = SSHConnection(args.repo_user, args.repo_url, args.ssh_key_path)
-
-    packages = []
-    if args.deb_directory:
-        debsign(args.deb_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.deb_directory, 'deb'))
-
-    if args.rpm_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .rpm package to {}".format(args.repo_url))
-        rpmsign(args.rpm_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user)
-        packages.append((args.rpm_directory, 'rpm'))
-
-    if args.tgz_directory:
-        if not is_open_source:
-            raise Exception("Cannot upload .tgz package to {}".format(args.repo_url))
-        packages.append((args.tgz_directory, 'tgz'))
-
-    if is_open_source:
-        logging.info("Clearing old directory with incoming packages on buildfarm")
-        clear_old_incoming_packages(ssh_connection, args.repo_user)
-        logging.info("Incoming directory cleared")
-
-        for package_path, package_type in packages:
-            logging.info("Processing path '%s' with package type %s", package_path, package_type)
-            incoming_directory = _get_incoming_path(args.repo_url, args.repo_user, package_type, args.release_type)
-            if package_type == "deb":
-                transfer_packages_dupload(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-            else:
-                transfer_packages_scp(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory)
-
-            logging.info("Running clickhouse install (it takes about (20-30 minutes)")
-            ssh_connection.execute("sudo /usr/sbin/ya-clickhouse-{0}-install".format(package_type))
-            logging.info("Clickhouse installed")
-            logging.info("Pushing clickhouse to repo")
-            ssh_connection.execute("/usr/sbin/push2publicrepo.sh clickhouse")
-            logging.info("Push finished")
-            logging.info("Package '%s' pushed", package_type)
-    else:
-        transfer_packages_dupload(args.ssh_key_path, args.deb_directory, args.repo_user, args.repo_url, _get_incoming_path(args.repo_url))
diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader
index 0627b19ea8f..33db76f57f4 100755
--- a/utils/s3tools/s3uploader
+++ b/utils/s3tools/s3uploader
@@ -121,8 +121,8 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Simple tool for uploading datasets to clickhouse S3",
         usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES))
-    parser.add_argument('--s3-api-url', default='s3.mds.yandex.net')
-    parser.add_argument('--s3-common-url', default='s3.yandex.net')
+    parser.add_argument('--s3-api-url', default='s3.amazonaws.com')
+    parser.add_argument('--s3-common-url', default='s3.amazonaws.com')
     parser.add_argument('--bucket-name', default='clickhouse-datasets')
     parser.add_argument('--dataset-name', required=True,
                         help='Name of dataset, will be used in uploaded path')
diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
index 8550675cb9e..5c694ee04ef 100644
--- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
+++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
@@ -214,7 +214,7 @@ try
     po::options_description desc("Allowed options");
     desc.add_options()
     ("help,h", "show help")
-    ("zookeeper,z", po::value<std::string>(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.yandex.ru:2181")
+    ("zookeeper,z", po::value<std::string>(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.clickhouse.com:2181")
     ("path,p", po::value<std::string>(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables")
     ("shard,s", po::value<std::string>(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.")
     ("table,t", po::value<std::string>(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.")
diff --git a/utils/zookeeper-create-entry-to-download-part/main.cpp b/utils/zookeeper-create-entry-to-download-part/main.cpp
index 1e86fe248ab..b92857929b7 100644
--- a/utils/zookeeper-create-entry-to-download-part/main.cpp
+++ b/utils/zookeeper-create-entry-to-download-part/main.cpp
@@ -11,7 +11,7 @@ try
     desc.add_options()
     ("help,h", "produce help message")
     ("address,a", boost::program_options::value<std::string>()->required(),
-     "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+     "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
     ("path,p", boost::program_options::value<std::string>()->required(), "path of replica queue to insert node (without trailing slash)")
     ("name,n", boost::program_options::value<std::string>()->required(), "name of part to download")
     ;
diff --git a/utils/zookeeper-dump-tree/main.cpp b/utils/zookeeper-dump-tree/main.cpp
index 893056564bb..d85762df640 100644
--- a/utils/zookeeper-dump-tree/main.cpp
+++ b/utils/zookeeper-dump-tree/main.cpp
@@ -14,7 +14,7 @@ int main(int argc, char ** argv)
         desc.add_options()
             ("help,h", "produce help message")
             ("address,a", boost::program_options::value<std::string>()->required(),
-                "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+                "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
             ("path,p", boost::program_options::value<std::string>()->default_value("/"),
                 "where to start")
             ("ctime,c", "print node ctime")
diff --git a/utils/zookeeper-remove-by-list/main.cpp b/utils/zookeeper-remove-by-list/main.cpp
index 2c97ffb4a70..cf194bd4861 100644
--- a/utils/zookeeper-remove-by-list/main.cpp
+++ b/utils/zookeeper-remove-by-list/main.cpp
@@ -12,7 +12,7 @@ try
     desc.add_options()
         ("help,h", "produce help message")
         ("address,a", boost::program_options::value<std::string>()->required(),
-        "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181")
+        "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181")
         ;
 
     boost::program_options::variables_map options;

From ccdd0a60f74e5069efd2c73c332ae281f6c374aa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:28:56 +0200
Subject: [PATCH 14/47] Remove Arcadia

---
 base/base/phdr_cache.cpp | 2 +-
 base/base/phdr_cache.h   | 2 +-
 base/daemon/BaseDaemon.h | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp
index 20a755ed7a4..36a9b4f1f62 100644
--- a/base/base/phdr_cache.cpp
+++ b/base/base/phdr_cache.cpp
@@ -2,7 +2,7 @@
 #pragma clang diagnostic ignored "-Wreserved-identifier"
 #endif
 
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
+/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
 
 #include <base/defines.h>
 
diff --git a/base/base/phdr_cache.h b/base/base/phdr_cache.h
index d2854ece0bc..b522710c4c4 100644
--- a/base/base/phdr_cache.h
+++ b/base/base/phdr_cache.h
@@ -1,6 +1,6 @@
 #pragma once
 
-/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
+/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
 
 /** Collects all dl_phdr_info items and caches them in a static array.
   * Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache
diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h
index 54a74369dce..152a431922c 100644
--- a/base/daemon/BaseDaemon.h
+++ b/base/daemon/BaseDaemon.h
@@ -76,10 +76,10 @@ public:
     /// return none if daemon doesn't exist, reference to the daemon otherwise
     static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
 
-    /// В Graphite компоненты пути(папки) разделяются точкой.
-    /// У нас принят путь формата root_path.hostname_yandex_ru.key
-    /// root_path по умолчанию one_min
-    /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
+    /// Graphite metric name has components separated by dots.
+    /// We used to have the following format: root_path.hostname_clickhouse_com.key
+    /// root_path - one_min by default
+    /// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total".
     template <class T>
     void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {

From 40357637ec356ec7e65145424a48743b19c1a4ca Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 00:33:32 +0200
Subject: [PATCH 15/47] Remove Arcadia

---
 src/Common/tests/gtest_DateLUTImpl.cpp        |   5 +-
 .../gtest_DateTime64_parsing_and_writing.cpp  | 122 ------------------
 utils/check-style/check-style                 |   5 -
 3 files changed, 2 insertions(+), 130 deletions(-)

diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index 1eec83a6ec9..be96c190be3 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest)
 
 TEST(DateLUTTest, TimeValuesInMiddleOfRange)
 {
-    const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk");
+    const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul");
     const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday)
 
-    EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk");
+    EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul");
     EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3
 
     EXPECT_EQ(lut.toDate(time), 1568581200);
@@ -538,4 +538,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
 //            {0, 0 + 11 * 3600 * 24 + 12, 11},
         }))
 );
-
diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
index c6208af2d5e..b2fd78fa8c2 100644
--- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
+++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
@@ -76,125 +76,3 @@ TEST_P(DateTime64StringParseBestEffortTest, parse)
 
     EXPECT_EQ(param.dt64, actual);
 }
-
-
-// YYYY-MM-DD HH:MM:SS.NNNNNNNNN
-INSTANTIATE_TEST_SUITE_P(Basic,
-    DateTime64StringParseTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "When subsecond part is missing from string it is set to zero.",
-            "2019-09-16 19:20:17",
-            1568650817'000,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string, but it is zero, it is set to zero.",
-            "2019-09-16 19:20:17.0",
-            1568650817'000,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part is not set.",
-            "2019-09-16 19:20:17",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part is 0 despite being present in string.",
-            "2019-09-16 19:20:17.123",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.",
-            "2019-09-16 19:20:17.123",
-            1568650817'123,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.",
-            "2019-09-16 19:20:17.012",
-            1568650817'012,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).",
-            "2019-09-16 19:20:17.123",
-            1568650817'12300ULL,
-            5,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.",
-            "2019-09-16 19:20:17.123",
-            1568650817'1ULL,
-            1,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
-INSTANTIATE_TEST_SUITE_P(BestEffort,
-    DateTime64StringParseBestEffortTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "When subsecond part is unreasonably large, it truncated to given scale",
-            "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364",
-            1568650817'123456ULL,
-            6,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
-
-// TODO: add negative test cases for invalid strings, verifying that error is reported properly
-
-INSTANTIATE_TEST_SUITE_P(Basic,
-    DateTime64StringWriteTest,
-    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
-        {
-            "non-zero subsecond part on DateTime64 with scale of 3",
-            "2019-09-16 19:20:17.123",
-            1568650817'123,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "non-zero subsecond part on DateTime64 with scale of 5",
-            "2019-09-16 19:20:17.12345",
-            1568650817'12345ULL,
-            5,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "Zero subsecond part is written to string",
-            "2019-09-16 19:20:17.000",
-            1568650817'000ULL,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "When scale is 0, subsecond part (and separtor) is missing from string",
-            "2019-09-16 19:20:17",
-            1568650817ULL,
-            0,
-            DateLUT::instance("Europe/Minsk")
-        },
-        {
-            "Subsecond part with leading zeroes is written to string correctly",
-            "2019-09-16 19:20:17.001",
-            1568650817'001ULL,
-            3,
-            DateLUT::instance("Europe/Minsk")
-        }
-    })
-);
-
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 6ebf53cb932..20954781fbd 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -147,11 +147,6 @@ find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.
     grep -vP $EXCLUDE_DIRS |
     xargs yamllint --config-file=$ROOT_PATH/.yamllint
 
-# Machine translation to Russian is strictly prohibited
-find $ROOT_PATH/docs/ru -name '*.md' |
-    grep -vP $EXCLUDE_DIRS |
-    xargs grep -l -F 'machine_translated: true'
-
 # Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
 find $ROOT_PATH/tests/queries -iname '*fail*' |
     grep -vP $EXCLUDE_DIRS |

From 69b91c5410dc5d1153be96b2985a5b78462028e4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:08:06 +0200
Subject: [PATCH 16/47] Simplification #36313

---
 src/Common/getNumberOfPhysicalCPUCores.cpp | 34 +++++++---------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp
index f415ee58e85..0df7a83cd53 100644
--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@@ -5,9 +5,6 @@
 #    include <cmath>
 #    include <fstream>
 #endif
-#if USE_CPUID
-#    include <libcpuid/libcpuid.h>
-#endif
 
 #include <thread>
 
@@ -41,32 +38,21 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
 
 static unsigned getNumberOfPhysicalCPUCoresImpl()
 {
-    unsigned cpu_count = 0; // start with an invalid num
+    unsigned cpu_count = std::thread::hardware_concurrency();
 
-#if USE_CPUID
-    cpu_raw_data_t raw_data;
-    cpu_id_t data;
+    /// Most of x86_64 CPUs have 2-way Hyper-Threading
+    /// Aarch64 and RISC-V don't have SMT so far.
+    /// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
 
-    /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
-    /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
-    /// Also, libcpuid gives strange result on Google Compute Engine VMs.
-    /// Example:
-    ///  num_cores = 12,            /// number of physical cores on current CPU socket
-    ///  total_logical_cpus = 1,    /// total number of logical cores on all sockets
-    ///  num_logical_cpus = 24.     /// number of logical cores on current CPU socket
-    /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
-
-    if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0)
-        cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
+#if defined(__x86_64__)
+    /// Let's limit ourself to the number of physical cores.
+    /// But if the number of logical cores is small - maybe it is a small machine
+    /// or very limited cloud instance and it is reasonable to use all the cores.
+    if (cpu_count >= 8)
+        cpu_count /= 2;
 #endif
 
-    /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
-    /// (Actually, only Aarch64 is supported).
-    if (cpu_count == 0)
-        cpu_count = std::thread::hardware_concurrency();
-
 #if defined(OS_LINUX)
-    /// TODO: add a setting for disabling that, similar to UseContainerSupport in java
     cpu_count = getCGroupLimitedCPUCores(cpu_count);
 #endif
 

From 33ffdcaa4f21f6611eeea8b72c0cf71971baf86d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:18:22 +0200
Subject: [PATCH 17/47] Return two changes

---
 src/Common/tests/gtest_DateLUTImpl.cpp        |   5 +-
 .../gtest_DateTime64_parsing_and_writing.cpp  | 122 ++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp
index be96c190be3..1eec83a6ec9 100644
--- a/src/Common/tests/gtest_DateLUTImpl.cpp
+++ b/src/Common/tests/gtest_DateLUTImpl.cpp
@@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest)
 
 TEST(DateLUTTest, TimeValuesInMiddleOfRange)
 {
-    const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul");
+    const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk");
     const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday)
 
-    EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul");
+    EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk");
     EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3
 
     EXPECT_EQ(lut.toDate(time), 1568581200);
@@ -538,3 +538,4 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
 //            {0, 0 + 11 * 3600 * 24 + 12, 11},
         }))
 );
+
diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
index b2fd78fa8c2..c6208af2d5e 100644
--- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
+++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp
@@ -76,3 +76,125 @@ TEST_P(DateTime64StringParseBestEffortTest, parse)
 
     EXPECT_EQ(param.dt64, actual);
 }
+
+
+// YYYY-MM-DD HH:MM:SS.NNNNNNNNN
+INSTANTIATE_TEST_SUITE_P(Basic,
+    DateTime64StringParseTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "When subsecond part is missing from string it is set to zero.",
+            "2019-09-16 19:20:17",
+            1568650817'000,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string, but it is zero, it is set to zero.",
+            "2019-09-16 19:20:17.0",
+            1568650817'000,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part is not set.",
+            "2019-09-16 19:20:17",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part is 0 despite being present in string.",
+            "2019-09-16 19:20:17.123",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.",
+            "2019-09-16 19:20:17.123",
+            1568650817'123,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.",
+            "2019-09-16 19:20:17.012",
+            1568650817'012,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).",
+            "2019-09-16 19:20:17.123",
+            1568650817'12300ULL,
+            5,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.",
+            "2019-09-16 19:20:17.123",
+            1568650817'1ULL,
+            1,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+
+INSTANTIATE_TEST_SUITE_P(BestEffort,
+    DateTime64StringParseBestEffortTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "When subsecond part is unreasonably large, it truncated to given scale",
+            "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364",
+            1568650817'123456ULL,
+            6,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+
+
+// TODO: add negative test cases for invalid strings, verifying that error is reported properly
+
+INSTANTIATE_TEST_SUITE_P(Basic,
+    DateTime64StringWriteTest,
+    ::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
+        {
+            "non-zero subsecond part on DateTime64 with scale of 3",
+            "2019-09-16 19:20:17.123",
+            1568650817'123,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "non-zero subsecond part on DateTime64 with scale of 5",
+            "2019-09-16 19:20:17.12345",
+            1568650817'12345ULL,
+            5,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "Zero subsecond part is written to string",
+            "2019-09-16 19:20:17.000",
+            1568650817'000ULL,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "When scale is 0, subsecond part (and separtor) is missing from string",
+            "2019-09-16 19:20:17",
+            1568650817ULL,
+            0,
+            DateLUT::instance("Europe/Minsk")
+        },
+        {
+            "Subsecond part with leading zeroes is written to string correctly",
+            "2019-09-16 19:20:17.001",
+            1568650817'001ULL,
+            3,
+            DateLUT::instance("Europe/Minsk")
+        }
+    })
+);
+

From fa46e0d22fb28f62a7125f680c35b7a25edb8b9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 02:54:21 +0200
Subject: [PATCH 18/47] Add an option for build profiling

---
 CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dad9a25ab26..4f1a6c05730 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -314,6 +314,15 @@ if (ENABLE_BUILD_PATH_MAPPING)
     set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
 endif ()
 
+option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
+if (ENABLE_BUILD_PROFILING)
+     if (COMPILER_CLANG)
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
+     else ()
+        message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
+     endif ()
+endif ()
+
 if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
     # CMake < 3.12 doesn't support setting 20 as a C++ standard version.
     # We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now.

From 294efeccfe7532fe1c29052b7a908e245a0a76ec Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 16 Apr 2022 01:15:40 +0200
Subject: [PATCH 19/47] Fix clang-tidy-14 (part 1)

---
 contrib/sysroot                                      |  2 +-
 src/Access/DiskAccessStorage.cpp                     |  3 ++-
 .../AggregateFunctionAvgWeighted.cpp                 |  4 ++--
 src/AggregateFunctions/AggregateFunctionDeltaSum.cpp |  2 +-
 .../AggregateFunctionGroupArray.cpp                  |  2 +-
 .../AggregateFunctionGroupBitmap.cpp                 |  2 +-
 .../AggregateFunctionGroupUniqArray.cpp              |  2 +-
 src/AggregateFunctions/AggregateFunctionQuantile.cpp |  4 ++--
 src/AggregateFunctions/AggregateFunctionSparkbar.cpp |  2 +-
 .../AggregateFunctionStatisticsSimple.cpp            |  2 +-
 src/AggregateFunctions/AggregateFunctionSum.cpp      |  2 +-
 src/AggregateFunctions/AggregateFunctionSumCount.cpp |  2 +-
 src/AggregateFunctions/AggregateFunctionTopK.cpp     |  2 +-
 src/Common/UTF8Helpers.cpp                           |  2 +-
 src/Dictionaries/IPAddressDictionary.cpp             | 12 ++++++------
 src/Functions/FunctionHelpers.cpp                    |  2 +-
 src/Functions/FunctionsLogical.cpp                   |  6 +++---
 src/Functions/array/arrayElement.cpp                 |  2 +-
 src/Functions/if.cpp                                 |  6 +++---
 src/Functions/toLowCardinality.cpp                   |  1 -
 src/Interpreters/JIT/compileFunction.cpp             |  2 +-
 src/Interpreters/convertFieldToType.cpp              | 12 ++++++------
 src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp    |  3 ++-
 src/Storages/FileLog/StorageFileLog.cpp              |  2 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp        |  2 +-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp       |  2 +-
 src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp |  2 +-
 .../MergeTree/MergeTreeIndexGranuleBloomFilter.cpp   |  2 +-
 src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp      |  2 +-
 29 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/contrib/sysroot b/contrib/sysroot
index bbcac834526..e9fb375d0a1 160000
--- a/contrib/sysroot
+++ b/contrib/sysroot
@@ -1 +1 @@
-Subproject commit bbcac834526d90d1e764164b861be426891d1743
+Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index 7393fcd8d36..a9eb27c291c 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
 
     /// Create the 'need_rebuild_lists.mark' file.
     /// This file will be used later to find out if writing lists is successful or not.
-    std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
+    std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
+    out.close();
 
     lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
     lists_writing_thread_is_waiting = true;
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
index ab6fdc8fd7e..4d7901a7fac 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
     }
 
 template <class First, class ... TArgs>
-static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
+IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
 {
     const WhichDataType which(second_type);
 
@@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a
 
 // Not using helper functions because there are no templates for binary decimal/numeric function.
 template <class... TArgs>
-static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
+IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
 {
     const WhichDataType which(first_type);
 
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index f1c6e7c6112..3b43d9a85f8 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
         throw Exception("Incorrect number of arguments for aggregate function " + name,
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    DataTypePtr data_type = arguments[0];
+    const DataTypePtr & data_type = arguments[0];
 
     if (isInteger(data_type) || isFloat(data_type))
         return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 5a9fd778277..85075d5a4d6 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -20,7 +20,7 @@ namespace
 {
 
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
-static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
+IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index 331013e6f38..0eff1ae516f 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -19,7 +19,7 @@ namespace ErrorCodes
 namespace
 {
     template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
-    static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
+    IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
     {
         WhichDataType which(argument_type);
         if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
index 7709357189c..da934531f96 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp
@@ -40,7 +40,7 @@ public:
 };
 
 template <typename HasLimit, typename ... TArgs>
-static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
+IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate<HasLimit>(argument_type, std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
index 5f84eb9b271..6783a55418a 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -60,7 +60,7 @@ template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted
 template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
 
 template <template <typename, bool> class Function>
-static constexpr bool supportDecimal()
+constexpr bool supportDecimal()
 {
     return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
         std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
@@ -75,7 +75,7 @@ static constexpr bool supportDecimal()
 }
 
 template <template <typename, bool> class Function>
-static constexpr bool supportBigInt()
+constexpr bool supportBigInt()
 {
     return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
         std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
index 7f1196173a7..15567ee229d 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
@@ -19,7 +19,7 @@ namespace
 {
 
 template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
-static IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
+IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
 {
     WhichDataType which(argument_type);
     if (which.idx == TypeIndex::Date || which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
index d01e23afe24..4af21f9542c 100644
--- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
+++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp
@@ -24,7 +24,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (isDecimal(data_type))
         res.reset(createWithDecimalType<FunctionTemplate>(*data_type, *data_type, argument_types));
     else
diff --git a/src/AggregateFunctions/AggregateFunctionSum.cpp b/src/AggregateFunctions/AggregateFunctionSum.cpp
index 6ebd006890b..8e0b941e40f 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSum.cpp
@@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (isDecimal(data_type))
         res.reset(createWithDecimalType<Function>(*data_type, *data_type, argument_types));
     else
diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.cpp b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
index b39e292e0b9..f864469ced2 100644
--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@@ -29,7 +29,7 @@ createAggregateFunctionSumCount(const std::string & name, const DataTypes & argu
     assertUnary(name, argument_types);
 
     AggregateFunctionPtr res;
-    DataTypePtr data_type = argument_types[0];
+    const DataTypePtr & data_type = argument_types[0];
     if (!allowType(data_type))
         throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
             ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp
index 801f3d5e28d..4ebc80aceb5 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@@ -44,7 +44,7 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
 
 
 template <bool is_weighted>
-static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
+IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
 {
     if (argument_types.empty())
         throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");
diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp
index 873bd0b5c0e..b1d38c4f31a 100644
--- a/src/Common/UTF8Helpers.cpp
+++ b/src/Common/UTF8Helpers.cpp
@@ -100,7 +100,7 @@ enum ComputeWidthMode
 };
 
 template <ComputeWidthMode mode>
-static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
+size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
 {
     UTF8Decoder decoder;
     size_t width = 0;
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 929b04d14fa..e0bd8ebcbb9 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -288,8 +288,8 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const
 {
     validateKeyTypes(key_types);
 
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
 
     auto result = ColumnUInt8::create(rows);
     auto & out = result->getData();
@@ -613,8 +613,8 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
     auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
 
     if (const auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@@ -694,8 +694,8 @@ void IPAddressDictionary::getItemsImpl(
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto first_column = key_columns.front();
-    const auto rows = first_column->size();
+    const auto & first_column = key_columns.front();
+    const size_t rows = first_column->size();
 
     // special case for getBlockInputStream
     if (unlikely(key_columns.size() == 2))
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index e44962f4c38..74045cd9f12 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -128,7 +128,7 @@ void validateArgumentsImpl(const IFunction & func,
         }
 
         const auto & arg = arguments[i + argument_offset];
-        const auto descriptor = descriptors[i];
+        const auto & descriptor = descriptors[i];
         if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
             throw Exception("Illegal type of argument #" + std::to_string(argument_offset + i + 1) // +1 is for human-friendly 1-based indexing
                             + (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{})
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index c709cd22880..c478446e744 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -95,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res)
 
 
 template <class Op, typename Func>
-static bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
+bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
 {
     bool has_res = false;
 
@@ -345,7 +345,7 @@ struct OperationApplier<Op, OperationApplierImpl, 0>
 
 
 template <class Op>
-static ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
+ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
 {
     /// Combine all constant columns into a single constant value.
     UInt8 const_3v_value = 0;
@@ -420,7 +420,7 @@ struct TypedExecutorInvoker<Op>
 
 /// Types of all of the arguments are guaranteed to be non-nullable here
 template <class Op>
-static ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
+ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
 {
     /// Combine all constant columns into a single constant value.
     UInt8 const_val = 0;
diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index 887203ae58e..04195a7eef9 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -146,7 +146,7 @@ public:
 
     void update(size_t from)
     {
-        sink_null_map[index] = bool(src_null_map && src_null_map[from]);
+        sink_null_map[index] = src_null_map && src_null_map[from];
         ++index;
     }
 
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 82448966b8c..95c66c20541 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -45,7 +45,7 @@ using namespace GatherUtils;
   */
 
 template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
-static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
+inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool a_is_short = a.size() < size;
@@ -77,7 +77,7 @@ static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, co
 }
 
 template <typename ArrayCond, typename ArrayA, typename B, typename ArrayResult, typename ResultType>
-static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
+inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool a_is_short = a.size() < size;
@@ -95,7 +95,7 @@ static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a,
 }
 
 template <typename ArrayCond, typename A, typename ArrayB, typename ArrayResult, typename ResultType>
-static inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
+inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
 {
     size_t size = cond.size();
     bool b_is_short = b.size() < size;
diff --git a/src/Functions/toLowCardinality.cpp b/src/Functions/toLowCardinality.cpp
index b66f2ad90fd..eff01b144d9 100644
--- a/src/Functions/toLowCardinality.cpp
+++ b/src/Functions/toLowCardinality.cpp
@@ -35,7 +35,6 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t /*input_rows_count*/) const override
     {
-        auto arg_num = arguments[0];
         const auto & arg = arguments[0];
 
         if (arg.type->lowCardinality())
diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp
index aaf722b505e..4dac65046a7 100644
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@@ -201,7 +201,7 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
     for (size_t i = 0; i < arg_types.size(); ++i)
     {
         auto & column = columns[i];
-        auto type = arg_types[i];
+        auto & type = arg_types[i];
 
         auto * value = b.CreateLoad(toNativeType(b, removeNullable(type)), column.data);
         if (!type->isNullable())
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 7abe8342100..79f41d77526 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -52,7 +52,7 @@ namespace
 {
 
 template <typename From, typename To>
-static Field convertNumericTypeImpl(const Field & from)
+Field convertNumericTypeImpl(const Field & from)
 {
     To result;
     if (!accurate::convertNumeric(from.get<From>(), result))
@@ -61,7 +61,7 @@ static Field convertNumericTypeImpl(const Field & from)
 }
 
 template <typename To>
-static Field convertNumericType(const Field & from, const IDataType & type)
+Field convertNumericType(const Field & from, const IDataType & type)
 {
     if (from.getType() == Field::Types::UInt64 || from.getType() == Field::Types::Bool)
         return convertNumericTypeImpl<UInt64, To>(from);
@@ -84,7 +84,7 @@ static Field convertNumericType(const Field & from, const IDataType & type)
 
 
 template <typename From, typename T>
-static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     From value = from.get<From>();
     if (!type.canStoreWhole(value))
@@ -96,7 +96,7 @@ static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T
 
 
 template <typename T>
-static Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     const String & str_value = from.get<String>();
     T value = type.parseFromString(str_value);
@@ -104,7 +104,7 @@ static Field convertStringToDecimalType(const Field & from, const DataTypeDecima
 }
 
 template <typename From, typename T>
-static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
+Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
 {
     auto field = from.get<DecimalField<From>>();
     T value = convertDecimals<DataTypeDecimal<From>, DataTypeDecimal<T>>(field.getValue(), field.getScale(), type.getScale());
@@ -112,7 +112,7 @@ static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecim
 }
 
 template <typename To>
-static Field convertDecimalType(const Field & from, const To & type)
+Field convertDecimalType(const Field & from, const To & type)
 {
     if (from.getType() == Field::Types::UInt64)
         return convertIntToDecimalType<UInt64>(from, type);
diff --git a/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
index f6c42171d5b..a682a5ef8e8 100644
--- a/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
+++ b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp
@@ -1,6 +1,7 @@
 #include <Parsers/Access/ASTShowAccessEntitiesQuery.h>
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
+#include <fmt/format.h>
 
 
 namespace DB
@@ -20,7 +21,7 @@ String ASTShowAccessEntitiesQuery::getKeyword() const
 
 String ASTShowAccessEntitiesQuery::getID(char) const
 {
-    return "SHOW " + String(getKeyword()) + " query";
+    return fmt::format("SHOW {} query", getKeyword());
 }
 
 void ASTShowAccessEntitiesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp
index 47490aae75b..82412b1e649 100644
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@@ -763,7 +763,7 @@ void registerStorageFileLog(StorageFactory & factory)
 
         if (!num_threads) /// Default
         {
-            num_threads = std::max(unsigned(1), physical_cpu_cores / 4);
+            num_threads = std::max(1U, physical_cpu_cores / 4);
             filelog_settings->set("max_threads", num_threads);
         }
         else if (num_threads > physical_cpu_cores)
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 60eb11a4fc0..a6c78d3107d 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -987,7 +987,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
             }
         };
 
-        size_t num_threads = std::min(size_t(num_streams), parts.size());
+        size_t num_threads = std::min<size_t>(num_streams, parts.size());
 
         if (num_threads <= 1)
         {
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index e39ae7a4037..24ca7cc2f5a 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -180,7 +180,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
     {
         Row partition(partition_columns.size());
         for (size_t i = 0; i < partition_columns.size(); ++i)
-            partition[i] = Field((*partition_columns[i])[partition_num_to_first_row[num]]);
+            partition[i] = (*partition_columns[i])[partition_num_to_first_row[num]];
         return partition;
     };
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 4292655dddc..33668b96a60 100644
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -103,7 +103,7 @@ MergeTreeIndexPtr bloomFilterIndexCreatorNew(
     if (!index.arguments.empty())
     {
         const auto & argument = index.arguments[0];
-        max_conflict_probability = std::min(Float64(1), std::max(argument.safeGet<Float64>(), Float64(0)));
+        max_conflict_probability = std::min<Float64>(1.0, std::max<Float64>(argument.safeGet<Float64>(), 0.0));
     }
 
     const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
index 9bc00274740..f80d40d2fa8 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
@@ -22,7 +22,7 @@ static void assertGranuleBlocksStructure(const Blocks & granule_index_blocks)
     Block prev_block;
     for (size_t index = 0; index < granule_index_blocks.size(); ++index)
     {
-        Block granule_index_block = granule_index_blocks[index];
+        const Block & granule_index_block = granule_index_blocks[index];
 
         if (index != 0)
             assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure.");
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 05b30bb014e..71440591c3a 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -464,7 +464,7 @@ Pipe StorageEmbeddedRocksDB::read(
         Pipes pipes;
 
         size_t num_keys = keys->size();
-        size_t num_threads = std::min(size_t(num_streams), keys->size());
+        size_t num_threads = std::min<size_t>(num_streams, keys->size());
 
         assert(num_keys <= std::numeric_limits<uint32_t>::max());
         assert(num_threads <= std::numeric_limits<uint32_t>::max());

From 0660d244315ed3e49d698ae420eec4634d8e766d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Sat, 16 Apr 2022 10:27:24 +0200
Subject: [PATCH 20/47] Add dispatch trigger for debug CI

---
 .github/workflows/debug.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml
index 7bb5ac65140..fa980a95a39 100644
--- a/.github/workflows/debug.yml
+++ b/.github/workflows/debug.yml
@@ -2,7 +2,7 @@
 name: Debug
 
 'on':
-  [push, pull_request, release]
+  [push, pull_request, release, workflow_dispatch]
 
 jobs:
   DebugInfo:

From b7fe203fe1456618328a00e46aebd455e96426ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 17 Apr 2022 01:56:58 +0200
Subject: [PATCH 21/47] Warn and exit if harmful environment variables are set

---
 programs/main.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/programs/main.cpp b/programs/main.cpp
index 2cdda075ca7..2a9bfb90879 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -13,6 +13,8 @@
 #include <tuple>
 #include <utility> /// pair
 
+#include <fmt/format.h>
+
 #include "config_tools.h"
 
 #include <Common/StringUtils/StringUtils.h>
@@ -332,6 +334,19 @@ struct Checker
 #endif
 ;
 
+void checkHarmfulEnvironmentVariables()
+{
+    /// The list is a selection from "man ld-linux":
+    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK"})
+    {
+        if (const char * value = getenv(var); value && value[0])
+        {
+            std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value);
+            _exit(1);
+        }
+    }
+}
+
 }
 
 
@@ -352,6 +367,8 @@ int main(int argc_, char ** argv_)
     inside_main = true;
     SCOPE_EXIT({ inside_main = false; });
 
+    checkHarmfulEnvironmentVariables();
+
     /// Reset new handler to default (that throws std::bad_alloc)
     /// It is needed because LLVM library clobbers it.
     std::set_new_handler(nullptr);

From b25850a66570d01d9463f3cef11e94f5f7aa514d Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 05:42:07 +0000
Subject: [PATCH 22/47] Add setting throw_if_no_data_to_insert

---
 src/Client/ClientBase.cpp                             |  8 +++++++-
 src/Core/Settings.h                                   |  1 +
 .../0_stateless/02267_insert_empty_data.reference     |  0
 tests/queries/0_stateless/02267_insert_empty_data.sql | 11 +++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02267_insert_empty_data.reference
 create mode 100644 tests/queries/0_stateless/02267_insert_empty_data.sql

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 929f0a48e2b..028e4a6f16e 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1055,7 +1055,13 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
     /// Process the query that requires transferring data blocks to the server.
     const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
     if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
-        throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+    {
+        const auto & settings = global_context->getSettingsRef();
+        if (settings.throw_if_no_data_to_insert)
+            throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+        else
+            return;
+    }
 
     connection->sendQuery(
         connection_parameters.timeouts,
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa78456702c..350be1295cb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -582,6 +582,7 @@ class IColumn;
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
+    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, disable by default", 0) \
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
 
diff --git a/tests/queries/0_stateless/02267_insert_empty_data.reference b/tests/queries/0_stateless/02267_insert_empty_data.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02267_insert_empty_data.sql b/tests/queries/0_stateless/02267_insert_empty_data.sql
new file mode 100644
index 00000000000..9c92fc2a3f7
--- /dev/null
+++ b/tests/queries/0_stateless/02267_insert_empty_data.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS t;
+
+CREATE TABLE t (n UInt32) ENGINE=Memory;
+
+INSERT INTO t VALUES; -- { clientError 108 }
+
+set throw_if_no_data_to_insert = 0;
+
+INSERT INTO t VALUES;
+
+DROP TABLE t;

From e43bdf75809bfa0c09252846d33caee175d02365 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 17 Apr 2022 17:21:20 +0300
Subject: [PATCH 23/47] Update main.cpp

---
 programs/main.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 2a9bfb90879..71fb09cc183 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -336,8 +336,9 @@ struct Checker
 
 void checkHarmfulEnvironmentVariables()
 {
-    /// The list is a selection from "man ld-linux":
-    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK"})
+    /// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
+    /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
+    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
     {
         if (const char * value = getenv(var); value && value[0])
         {

From 293d0a5d462493723b409581380278911816ece8 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 15:07:56 +0000
Subject: [PATCH 24/47] Implement type inference for INSERT INTO function
 null()

---
 src/TableFunctions/TableFunctionNull.cpp        | 17 ++++++++++++-----
 src/TableFunctions/TableFunctionNull.h          |  7 ++++++-
 ...ence_for_insert_into_function_null.reference |  0
 ..._inference_for_insert_into_function_null.sql |  3 +++
 4 files changed, 21 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference
 create mode 100644 tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql

diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp
index 63e496ecd0c..cb51799a4a7 100644
--- a/src/TableFunctions/TableFunctionNull.cpp
+++ b/src/TableFunctions/TableFunctionNull.cpp
@@ -21,13 +21,16 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c
 {
     const auto * function = ast_function->as<ASTFunction>();
     if (!function || !function->arguments)
-        throw Exception("Table function '" + getName() + "' requires 'structure'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        throw Exception("Table function '" + getName() + "' requires 'structure'", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     const auto & arguments = function->arguments->children;
-    if (arguments.size() != 1)
-        throw Exception("Table function '" + getName() + "' requires 'structure'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+    if (!arguments.empty() && arguments.size() != 1)
+        throw Exception(
+            "Table function '" + getName() + "' requires 'structure' argument or empty argument",
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    structure = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context)->as<ASTLiteral>()->value.safeGet<String>();
+    if (!arguments.empty())
+        structure = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context)->as<ASTLiteral>()->value.safeGet<String>();
 }
 
 ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const
@@ -37,7 +40,11 @@ ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context
 
 StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    auto columns = getActualTableStructure(context);
+    ColumnsDescription columns;
+    if (structure != "auto")
+        columns = getActualTableStructure(context);
+    else if (!structure_hint.empty())
+        columns = structure_hint;
     auto res = StorageNull::create(StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription(), String{});
     res->startup();
     return res;
diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h
index 6734fb8efb6..329315e00c9 100644
--- a/src/TableFunctions/TableFunctionNull.h
+++ b/src/TableFunctions/TableFunctionNull.h
@@ -16,6 +16,10 @@ class TableFunctionNull : public ITableFunction
 public:
     static constexpr auto name = "null";
     std::string getName() const override { return name; }
+
+    bool needStructureHint() const override { return structure == "auto"; }
+
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
 private:
     StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override;
     const char * getStorageTypeName() const override { return "Null"; }
@@ -23,7 +27,8 @@ private:
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
     ColumnsDescription getActualTableStructure(ContextPtr context) const override;
 
-    String structure;
+    String structure = "auto";
+    ColumnsDescription structure_hint;
 };
 
 }
diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
new file mode 100644
index 00000000000..487ff661e37
--- /dev/null
+++ b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
@@ -0,0 +1,3 @@
+INSERT INTO function null() SELECT 1;
+INSERT INTO function null() SELECT number FROM numbers(10);
+INSERT INTO function null() SELECT number, toString(number) FROM numbers(10);

From 0024a08ace455a0be23dbad7bff01b0d05bec905 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 14:32:26 +0000
Subject: [PATCH 25/47] fix

fix typo
---
 src/Parsers/ParserInsertQuery.cpp | 7 +++++--
 src/Server/GRPCServer.cpp         | 8 +++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp
index b0ca361155f..7f8a8d59fd0 100644
--- a/src/Parsers/ParserInsertQuery.cpp
+++ b/src/Parsers/ParserInsertQuery.cpp
@@ -41,6 +41,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_with("WITH");
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
+    ParserToken s_semicolon(TokenType::Semicolon);
     ParserIdentifier name_p(true);
     ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
     ParserFunction table_function_p{false};
@@ -146,8 +147,10 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing.
     if (!infile && s_values.ignore(pos, expected))
     {
-        /// If VALUES is defined in query, everything except setting will be parsed as data
-        data = pos->begin;
+        /// If VALUES is defined in query, everything except setting will be parsed as data,
+        /// and if values followed by semicolon, the data should be null.
+        if (!s_semicolon.checkWithoutMoving(pos, expected))
+            data = pos->begin;
         format_str = "Values";
     }
     else if (s_format.ignore(pos, expected))
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 7578f8afc1d..68d73a6be2a 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -956,7 +956,13 @@ namespace
             if (!insert_query)
                 throw Exception("Query requires data to insert, but it is not an INSERT query", ErrorCodes::NO_DATA_TO_INSERT);
             else
-                throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+            {
+                const auto & settings = query_context->getSettingsRef();
+                if (settings.throw_if_no_data_to_insert)
+                    throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
+                else
+                    return;
+            }
         }
 
         /// This is significant, because parallel parsing may be used.

From 1ffd9fe34ce3e8384b971a2f6889143ff457c638 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 17 Apr 2022 16:29:13 +0000
Subject: [PATCH 26/47] update test

---
 .../02267_type_inference_for_insert_into_function_null.sql     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
index 487ff661e37..de8332442c7 100644
--- a/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
+++ b/tests/queries/0_stateless/02267_type_inference_for_insert_into_function_null.sql
@@ -1,3 +1,6 @@
 INSERT INTO function null() SELECT 1;
 INSERT INTO function null() SELECT number FROM numbers(10);
 INSERT INTO function null() SELECT number, toString(number) FROM numbers(10);
+INSERT INTO function null('auto') SELECT 1;
+INSERT INTO function null('auto') SELECT number FROM numbers(10);
+INSERT INTO function null('auto') SELECT number, toString(number) FROM numbers(10);

From aa93f4a0743cb411961495eb03702642ff56ddf3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 17 Apr 2022 20:48:02 +0200
Subject: [PATCH 27/47] Do not randomize "priority" setting

---
 src/Interpreters/SystemLog.cpp | 9 ++-------
 tests/clickhouse-test          | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index f079e41851a..c53b635ed6c 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -182,9 +182,7 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context)
 
 }
 
-///
-/// SystemLogs
-///
+
 SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config)
 {
     query_log = createSystemLog<QueryLog>(global_context, "system", "query_log", config, "query_log");
@@ -273,9 +271,7 @@ void SystemLogs::shutdown()
         log->shutdown();
 }
 
-///
-/// SystemLog
-///
+
 template <typename LogElement>
 SystemLog<LogElement>::SystemLog(
     ContextPtr context_,
@@ -545,7 +541,6 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
         storage_settings->loadFromQuery(*create->storage);
     }
 
-
     return create;
 }
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5f15ecf3dc6..13d45e7f8cd 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -382,7 +382,6 @@ class SettingsRandomizer:
         "group_by_two_level_threshold_bytes": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 50000000,
         "distributed_aggregation_memory_efficient": lambda: random.randint(0, 1),
         "fsync_metadata": lambda: random.randint(0, 1),
-        "priority": lambda: min(3, int(abs(random.gauss(0, 2)))),
         "output_format_parallel_formatting": lambda: random.randint(0, 1),
         "input_format_parallel_parsing": lambda: random.randint(0, 1),
         "min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))),

From 19a8207ab707ae54e8e5349de0246ba29131d362 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 00:16:53 +0200
Subject: [PATCH 28/47] Debug integration tests

---
 tests/integration/test_config_xml_full/configs/config.xml       | 2 +-
 tests/integration/test_distributed_inter_server_secret/test.py  | 2 +-
 .../configs/conf.xml                                            | 2 +-
 tests/integration/test_jemalloc_percpu_arena/test.py            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml
index 4c0b8275869..f7a0afecac5 100644
--- a/tests/integration/test_config_xml_full/configs/config.xml
+++ b/tests/integration/test_config_xml_full/configs/config.xml
@@ -933,7 +933,7 @@
     <!-- Custom TLD lists.
          Format: <name>/path/to/file</name>
 
-         Changes will not be applied w/o server restart.
+         Changes will not be applied without server restart.
          Path to the list is under top_level_domains_path (see above).
     -->
     <top_level_domains_lists>
diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index 8d344834c50..fbfcb35e85f 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -201,7 +201,7 @@ def test_secure_insert_sync():
     n1.query("TRUNCATE TABLE data ON CLUSTER secure")
 
 
-# INSERT w/o initial_user
+# INSERT without initial_user
 #
 # Buffer() flush happens with global context, that does not have user
 # And so Context::user/ClientInfo::current_user/ClientInfo::initial_user will be empty
diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
index 2c40f0fab4a..ed2cdb1ea29 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
@@ -2,7 +2,7 @@
     <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly.
 
          Also note, that usually it is enough 3Gi,
-         but TSan uses 2.8+- GiB of RAM w/o just at start,
+         but TSan uses 2.8+- GiB of RAM without just at start,
          so this limit had been increased to 4GB
     -->
     <max_server_memory_usage>4000000000</max_server_memory_usage>
diff --git a/tests/integration/test_jemalloc_percpu_arena/test.py b/tests/integration/test_jemalloc_percpu_arena/test.py
index 80d8e2ae36a..0dccde3776e 100755
--- a/tests/integration/test_jemalloc_percpu_arena/test.py
+++ b/tests/integration/test_jemalloc_percpu_arena/test.py
@@ -59,7 +59,7 @@ def skip_if_jemalloc_disabled():
     """
     ).strip()
     if output != b"ON" and output != b"1":
-        pytest.skip(f"Compiled w/o jemalloc (USE_JEMALLOC={output})")
+        pytest.skip(f"Compiled without jemalloc (USE_JEMALLOC={output})")
 
 
 # Ensure that clickhouse works even when number of online CPUs

From 1959ae2c9e5096672ca4316561567453a498eda8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:00:18 +0200
Subject: [PATCH 29/47] Fix integration tests

---
 .../configs/conf.xml                                       | 7 +------
 .../test.py                                                | 6 ++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
index ed2cdb1ea29..9f7a88d4542 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml
@@ -1,10 +1,5 @@
 <clickhouse>
-    <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly.
-
-         Also note, that usually it is enough 3Gi,
-         but TSan uses 2.8+- GiB of RAM without just at start,
-         so this limit had been increased to 4GB
-    -->
+    <!-- To make it fail earlier, we will limit max_server_memory_usage explicitly. -->
     <max_server_memory_usage>4000000000</max_server_memory_usage>
 
     <query_thread_log remove="remove"/>
diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
index 2e4824a5a4f..338d7e14d5d 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
@@ -29,7 +29,12 @@ def start_cluster():
 # max_memory_usage_for_user cannot be used, since the memory for user accounted
 # correctly, only total is not (it is set via conf.xml)
 def test_memory_tracking_total():
+    if instance.is_built_with_thread_sanitizer():
+        pytest.skip("Memory tracking does not make sense to check under Thread Sanitizer")
+
     instance.query("CREATE TABLE null (row String) ENGINE=Null")
+
+    # Prepare data for insertion
     instance.exec_in_container(
         [
             "bash",
@@ -37,6 +42,7 @@ def test_memory_tracking_total():
             "clickhouse local -q \"SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), ' ') from numbers(10000)\" > data.json",
         ]
     )
+
     for it in range(0, 20):
         # the problem can be triggered only via HTTP,
         # since clickhouse-client parses the data by itself.

From 242919eddd56d3dcce7b7335e1ccfe6ff9937332 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:02:49 +0200
Subject: [PATCH 30/47] Remove abbreviation

---
 src/AggregateFunctions/AggregateFunctionIf.cpp                | 2 +-
 src/Client/ClientBase.cpp                                     | 4 ++--
 src/Common/DNSResolver.cpp                                    | 2 +-
 src/Common/SensitiveDataMasker.h                              | 2 +-
 src/Common/new_delete.cpp                                     | 4 ++--
 src/Core/Settings.h                                           | 4 ++--
 src/Databases/DatabaseMemory.cpp                              | 2 +-
 src/Dictionaries/HashedDictionary.cpp                         | 2 +-
 src/IO/PeekableReadBuffer.h                                   | 2 +-
 src/Interpreters/Context.cpp                                  | 2 +-
 src/Interpreters/ExpressionAnalyzer.cpp                       | 2 +-
 src/Interpreters/executeQuery.cpp                             | 2 +-
 src/Parsers/ASTInsertQuery.cpp                                | 2 +-
 src/Parsers/ASTSubquery.cpp                                   | 2 +-
 src/Parsers/InsertQuerySettingsPushDownVisitor.h              | 2 +-
 src/Processors/Formats/IInputFormat.h                         | 2 +-
 src/Processors/Transforms/WindowTransform.h                   | 2 +-
 src/QueryPipeline/Pipe.h                                      | 2 +-
 src/QueryPipeline/RemoteQueryExecutor.cpp                     | 2 +-
 src/Server/TCPHandler.cpp                                     | 4 ++--
 src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp | 2 +-
 .../MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp      | 4 ++--
 src/Storages/StorageDistributed.cpp                           | 2 +-
 src/Storages/StorageDistributed.h                             | 2 +-
 src/Storages/StorageMerge.cpp                                 | 2 +-
 25 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp
index ce71e76de43..dbf627ac6b5 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@@ -56,7 +56,7 @@ private:
     /// The name of the nested function, including combinators (i.e. *If)
     ///
     /// getName() from the nested_function cannot be used because in case of *If combinator
-    /// with Nullable argument nested_function will point to the function w/o combinator.
+    /// with Nullable argument nested_function will point to the function without combinator.
     /// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail.
     ///
     /// And nested_function cannot point to the function with *If since
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index c9b7bb3a8ee..3df30e98843 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1649,7 +1649,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 catch (...)
                 {
                     // Surprisingly, this is a client error. A server error would
-                    // have been reported w/o throwing (see onReceiveSeverException()).
+                    // have been reported without throwing (see onReceiveSeverException()).
                     client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
                     have_error = true;
                 }
@@ -1692,7 +1692,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                     if (!test_hint.clientError() && !test_hint.serverError())
                     {
                         // No error was expected but it still occurred. This is the
-                        // default case w/o test hint, doesn't need additional
+                        // default case without test hint, doesn't need additional
                         // diagnostics.
                         error_matches_hint = false;
                     }
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 1a3eaf91f68..d757ec2ae2a 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -89,7 +89,7 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
 
     /// NOTE:
     /// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
-    /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
+    /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets)
     if (host.starts_with('['))
     {
         assert(host.ends_with(']'));
diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h
index 88309459578..edd9f10ca91 100644
--- a/src/Common/SensitiveDataMasker.h
+++ b/src/Common/SensitiveDataMasker.h
@@ -31,7 +31,7 @@ namespace Util
 ///  context can't own, as Context is destroyed before logger,
 ///    and logger lives longer and logging can still happen after Context destruction.
 ///    resetting masker in the logger at the moment of
-///    context destruction can't be done w/o synchronization / locks in a safe manner.
+///    context destruction can't be done without synchronization / locks in a safe manner.
 ///
 ///  logger is Poco derived and i didn't want to brake it's interface,
 ///    also logger can be dynamically reconfigured without server restart,
diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp
index 7b4bff04185..052e1333be6 100644
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@@ -43,8 +43,8 @@ static struct InitializeJemallocZoneAllocatorForOSX
 
 
 /// Replace default new/delete with memory tracking versions.
-/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
-///     https://en.cppreference.com/w/cpp/memory/new/operator_delete
+/// @sa https://en.cppreference.com/w/cpp/memory/newithoutperator_new
+///     https://en.cppreference.com/w/cpp/memory/newithoutperator_delete
 
 /// new
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa78456702c..a2efbf7a92a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -58,7 +58,7 @@ class IColumn;
     M(Milliseconds, connect_timeout_with_failover_secure_ms, 100, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
     M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
     M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
-    M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain w/o ignoring errors", 0) \
+    M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain without ignoring errors", 0) \
     M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
     M(Milliseconds, hedged_connection_timeout_ms, 100, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
     M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
@@ -574,7 +574,7 @@ class IColumn;
     M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \
     M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \
     \
-    M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
+    M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
     \
     /** Experimental functions */ \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp
index a92c19f67c0..5f99eca1d18 100644
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@@ -44,7 +44,7 @@ void DatabaseMemory::dropTable(
     auto table = detachTableUnlocked(table_name, lock);
     try
     {
-        /// Remove table w/o lock since:
+        /// Remove table without lock since:
         /// - it does not require it
         /// - it may cause lock-order-inversion if underlying storage need to
         ///   resolve tables (like StorageLiveView)
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 178631d9c53..9640f013336 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -649,7 +649,7 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
             if constexpr (sparse || std::is_same_v<AttributeValueType, Field>)
             {
                 /// bucket_count() - Returns table size, that includes empty and deleted
-                /// size()         - Returns table size, w/o empty and deleted
+                /// size()         - Returns table size, without empty and deleted
                 /// and since this is sparsehash, empty cells should not be significant,
                 /// and since items cannot be removed from the dictionary, deleted is also not important.
                 bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index f22987d9daa..4bdf6dd3dbf 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -66,7 +66,7 @@ public:
     bool hasUnreadData() const;
 
     // for streaming reading (like in Kafka) we need to restore initial state of the buffer
-    // w/o recreating the buffer.
+    // without recreating the buffer.
     void reset();
 
 private:
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 40ed4d58993..f41d4662e99 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -437,7 +437,7 @@ struct ContextSharedPart
 #endif
         }
 
-        /// Can be removed w/o context lock
+        /// Can be removed without context lock
         delete_system_logs.reset();
         delete_embedded_dictionaries.reset();
         delete_external_dictionaries_loader.reset();
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 5e5931713e0..63fca80516b 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1756,7 +1756,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
             // The output of this expression chain is the result of
             // SELECT (before "final projection" i.e. renaming the columns), so
             // we have to mark the expressions that are required in the output,
-            // again. We did it for the previous expression chain ("select w/o
+            // again. We did it for the previous expression chain ("select without
             // window functions") earlier, in appendSelect(). But that chain also
             // produced the expressions required to calculate window functions.
             // They are not needed in the final SELECT result. Knowing the correct
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index a3232b798e5..a3a5c2946a3 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -544,7 +544,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         }
 
         /// MUST go before any modification (except for prepared statements,
-        /// since it substitute parameters and w/o them query does not contain
+        /// since it substitute parameters and without them query does not contain
         /// parameters), to keep query as-is in query_log and server log.
         query_for_logging = prepareQueryForLogging(query, context);
         logQuery(query_for_logging, context, internal);
diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp
index 3fa6a6ed1a9..dceec83e763 100644
--- a/src/Parsers/ASTInsertQuery.cpp
+++ b/src/Parsers/ASTInsertQuery.cpp
@@ -100,7 +100,7 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
         settings_ast->formatImpl(settings, state, frame);
     }
 
-    /// Compatibility for INSERT w/o SETTINGS to format in oneline, i.e.:
+    /// Compatibility for INSERT without SETTINGS to format in oneline, i.e.:
     ///
     ///     INSERT INTO foo VALUES
     ///
diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp
index 58f334376e6..84b7862c630 100644
--- a/src/Parsers/ASTSubquery.cpp
+++ b/src/Parsers/ASTSubquery.cpp
@@ -30,7 +30,7 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const
 void ASTSubquery::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     /// NOTE: due to trickery of filling cte_name (in interpreters) it is hard
-    /// to print it w/o newline (for !oneline case), since if nl_or_ws
+    /// to print it without newline (for !oneline case), since if nl_or_ws
     /// prepended here, then formatting will be incorrect with alias:
     ///
     ///   (select 1 in ((select 1) as sub))
diff --git a/src/Parsers/InsertQuerySettingsPushDownVisitor.h b/src/Parsers/InsertQuerySettingsPushDownVisitor.h
index cbb2ceaf517..d1f161fc89b 100644
--- a/src/Parsers/InsertQuerySettingsPushDownVisitor.h
+++ b/src/Parsers/InsertQuerySettingsPushDownVisitor.h
@@ -13,7 +13,7 @@ class SettingsChanges;
 /// Pushdown SETTINGS clause to the INSERT from the SELECT query:
 /// (since SETTINGS after SELECT will be parsed by the SELECT parser.)
 ///
-/// NOTE: INSERT ... SELECT ... FORMAT Null SETTINGS max_insert_threads=10 works even w/o push down,
+/// NOTE: INSERT ... SELECT ... FORMAT Null SETTINGS max_insert_threads=10 works even without push down,
 /// since ParserInsertQuery does not use ParserQueryWithOutput.
 class InsertQuerySettingsPushDownMatcher
 {
diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h
index 9774f630f15..d060ce4155e 100644
--- a/src/Processors/Formats/IInputFormat.h
+++ b/src/Processors/Formats/IInputFormat.h
@@ -45,7 +45,7 @@ public:
     /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format.
      * The recreating of parser for each small stream takes too long, so we introduce a method
      * resetParser() which allow to reset the state of parser to continue reading of
-     * source stream w/o recreating that.
+     * source stream without recreating that.
      * That should be called after current buffer was fully read.
      */
     virtual void resetParser();
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index d536c8780d2..e6db8402c5f 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -320,7 +320,7 @@ public:
     // We update the states of the window functions after we find the final frame
     // boundaries.
     // After we have found the final boundaries of the frame, we can immediately
-    // output the result for the current row, w/o waiting for more data.
+    // output the result for the current row, without waiting for more data.
     RowNumber frame_start;
     RowNumber frame_end;
     bool frame_ended = false;
diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h
index bc19b8389b3..6f85b7a6a88 100644
--- a/src/QueryPipeline/Pipe.h
+++ b/src/QueryPipeline/Pipe.h
@@ -96,7 +96,7 @@ public:
 
     /// Get processors from Pipe. Use it with cautious, it is easy to loss totals and extremes ports.
     static Processors detachProcessors(Pipe pipe) { return std::move(pipe.processors); }
-    /// Get processors from Pipe w/o destroying pipe (used for EXPLAIN to keep QueryPlan).
+    /// Get processors from Pipe without destroying pipe (used for EXPLAIN to keep QueryPlan).
     const Processors & getProcessors() const { return processors; }
 
     /// Specify quotas and limits for every ISourceWithProgress.
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index f4a30a9fee7..2cf39e54e06 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -503,7 +503,7 @@ void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
     }
     else
     {
-        /// Drain connections synchronously w/o suppressing errors.
+        /// Drain connections synchronously without suppressing errors.
         CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections);
         ConnectionCollector::drainConnections(*connections, /* throw_error= */ true);
         CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1);
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 981a59caefe..267eb978683 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -371,7 +371,7 @@ void TCPHandler::runImpl()
                 /// Send final progress
                 ///
                 /// NOTE: we cannot send Progress for regular INSERT (w/ VALUES)
-                /// w/o breaking protocol compatibility, but it can be done
+                /// without breaking protocol compatibility, but it can be done
                 /// by increasing revision.
                 sendProgress();
             }
@@ -1240,7 +1240,7 @@ void TCPHandler::receiveQuery()
 
     /// In interserer mode,
     /// initial_user can be empty in case of Distributed INSERT via Buffer/Kafka,
-    /// (i.e. when the INSERT is done with the global context w/o user),
+    /// (i.e. when the INSERT is done with the global context without user),
     /// so it is better to reset session to avoid using old user.
     if (is_interserver_mode)
     {
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
index 4d18adc1dfc..c4cb470923b 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
@@ -386,7 +386,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
                 else
                 {
                     // Need to remove ATTACH_PART from the queue or drop data.
-                    // Similar to `StorageReplicatedMergeTree::dropPart` w/o extra
+                    // Similar to `StorageReplicatedMergeTree::dropPart` without extra
                     // checks as we know drop shall be possible.
                     ReplicatedMergeTreeLogEntryData attach_rollback_log_entry;
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
index 44877c3da95..ce33ac8c467 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
@@ -97,7 +97,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
         threshold_init = settings->remote_fs_execute_merges_on_single_replica_time_threshold.totalSeconds();
 
     if (threshold == 0)
-        /// we can reset the settings w/o lock (it's atomic)
+        /// we can reset the settings without lock (it's atomic)
         execute_merges_on_single_replica_time_threshold = threshold;
     if (threshold_init == 0)
         remote_fs_execute_merges_on_single_replica_time_threshold = threshold_init;
@@ -137,7 +137,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
         if (execute_merges_on_single_replica_time_threshold > 0)
         {
             LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
-            /// we can reset the settings w/o lock (it's atomic)
+            /// we can reset the settings without lock (it's atomic)
             execute_merges_on_single_replica_time_threshold = 0;
         }
         /// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 62ec2524a32..999a1d1bb10 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -989,7 +989,7 @@ void StorageDistributed::drop()
     // (in shutdown()).
     shutdown();
 
-    // Distributed table w/o sharding_key does not allows INSERTs
+    // Distributed table without sharding_key does not allows INSERTs
     if (relative_data_path.empty())
         return;
 
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index a890cabd8b1..1bc7225db85 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -202,7 +202,7 @@ private:
     /// - WithMergeableStateAfterAggregationAndLimit
     /// - Complete
     ///
-    /// Some simple queries w/o GROUP BY/DISTINCT can use more optimal stage.
+    /// Some simple queries without GROUP BY/DISTINCT can use more optimal stage.
     ///
     /// Also in case of optimize_distributed_group_by_sharding_key=1 the queries
     /// with GROUP BY/DISTINCT sharding_key can also use more optimal stage.
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 8b71cfdb102..a2f25a9db7e 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -175,7 +175,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(
     /// In case of JOIN the first stage (which includes JOIN)
     /// should be done on the initiator always.
     ///
-    /// Since in case of JOIN query on shards will receive query w/o JOIN (and their columns).
+    /// Since in case of JOIN query on shards will receive query without JOIN (and their columns).
     /// (see removeJoin())
     ///
     /// And for this we need to return FetchColumns.

From c7bca13a03afa6ffca282c2d57f81a9a81b0e00d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:03:58 +0200
Subject: [PATCH 31/47] Remove abbreviation

---
 src/Common/new_delete.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp
index 052e1333be6..7b4bff04185 100644
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@@ -43,8 +43,8 @@ static struct InitializeJemallocZoneAllocatorForOSX
 
 
 /// Replace default new/delete with memory tracking versions.
-/// @sa https://en.cppreference.com/w/cpp/memory/newithoutperator_new
-///     https://en.cppreference.com/w/cpp/memory/newithoutperator_delete
+/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
+///     https://en.cppreference.com/w/cpp/memory/new/operator_delete
 
 /// new
 

From 24c13853c30ebffd6c2e453a7e26536b30dcc792 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 01:45:44 +0200
Subject: [PATCH 32/47] Fix slow test

---
 .../00089_position_functions_with_non_constant_arg.sql          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql b/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
index 4be84fe9e91..1404e9c34be 100644
--- a/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
+++ b/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.sql
@@ -1,3 +1,5 @@
+SET max_threads = 0; -- let's reset to automatic detection of the number of threads, otherwise test can be slow.
+
 SELECT count() FROM test.hits WHERE position(URL, 'metrika') != position(URL, materialize('metrika'));
 SELECT count() FROM test.hits WHERE positionCaseInsensitive(URL, 'metrika') != positionCaseInsensitive(URL, materialize('metrika'));
 SELECT count() FROM test.hits WHERE positionUTF8(Title, 'новости') != positionUTF8(Title, materialize('новости'));

From 56df9bbc0d8047c80e776eb3974cc942f3d3f947 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 18 Apr 2022 02:59:31 +0300
Subject: [PATCH 33/47] Update test.py

---
 .../test.py                                                   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
index 338d7e14d5d..c95bbfda708 100644
--- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
+++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py
@@ -30,7 +30,9 @@ def start_cluster():
 # correctly, only total is not (it is set via conf.xml)
 def test_memory_tracking_total():
     if instance.is_built_with_thread_sanitizer():
-        pytest.skip("Memory tracking does not make sense to check under Thread Sanitizer")
+        pytest.skip(
+            "Memory tracking does not make sense to check under Thread Sanitizer"
+        )
 
     instance.query("CREATE TABLE null (row String) ENGINE=Null")
 

From 70d7ba5d611fe4db6bade47accec55c325767424 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 02:13:28 +0200
Subject: [PATCH 34/47] Fix clang-tidy

---
 src/Interpreters/JIT/compileFunction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp
index 4dac65046a7..9d4f62fc873 100644
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@@ -201,7 +201,7 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
     for (size_t i = 0; i < arg_types.size(); ++i)
     {
         auto & column = columns[i];
-        auto & type = arg_types[i];
+        const auto & type = arg_types[i];
 
         auto * value = b.CreateLoad(toNativeType(b, removeNullable(type)), column.data);
         if (!type->isNullable())

From 0fc61ea44546de45f6db06c2c5ec11c3502e9261 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 18 Apr 2022 03:16:30 +0300
Subject: [PATCH 35/47] Update main.cpp

---
 programs/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/main.cpp b/programs/main.cpp
index 71fb09cc183..62f2f2f3150 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -338,7 +338,7 @@ void checkHarmfulEnvironmentVariables()
 {
     /// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
     /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
-    for (auto var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
+    for (const auto * var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
     {
         if (const char * value = getenv(var); value && value[0])
         {

From 5f7267b671135bf1dfdd9da4e99d7a9128d0ad03 Mon Sep 17 00:00:00 2001
From: yjant <44889745+yjant@users.noreply.github.com>
Date: Mon, 18 Apr 2022 09:32:53 +0800
Subject: [PATCH 36/47] Update nothing.md

The links are wrong.
---
 docs/zh/sql-reference/data-types/special-data-types/nothing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
index 52c705522e5..64f656185c1 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@@ -2,7 +2,7 @@
 
 此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。
 
-例如，文本 [NULL](../../../sql-reference/data-types/special-data-types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/special-data-types/nothing.md)。
+例如，字面量 [NULL](../../../sql-reference/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/nullable.md)。
 
 `Nothing` 类型也可以用来表示空数组：
 

From 9bab9077b27bcf84a99af663042d243d24605872 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 03:39:56 +0200
Subject: [PATCH 37/47] Fix error in sanity checks

---
 base/base/getAvailableMemoryAmount.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/base/getAvailableMemoryAmount.cpp b/base/base/getAvailableMemoryAmount.cpp
index d2f794e8952..e9bbbd95caf 100644
--- a/base/base/getAvailableMemoryAmount.cpp
+++ b/base/base/getAvailableMemoryAmount.cpp
@@ -14,8 +14,8 @@
 
 uint64_t getAvailableMemoryAmountOrZero()
 {
-#if defined(_SC_AVPHYS_PAGES) // linux
-    return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
+#if defined(_SC_PHYS_PAGES) // linux
+    return getPageSize() * sysconf(_SC_PHYS_PAGES);
 #elif defined(__FreeBSD__)
     struct vmtotal vmt;
     size_t vmt_size = sizeof(vmt);

From 0cc90c95688bffc219c87c464aa2099014c046f0 Mon Sep 17 00:00:00 2001
From: yjant <44889745+yjant@users.noreply.github.com>
Date: Mon, 18 Apr 2022 09:56:08 +0800
Subject: [PATCH 38/47] Update nothing.md

Links are wrong.
---
 .../ru/sql-reference/data-types/special-data-types/nothing.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/sql-reference/data-types/special-data-types/nothing.md b/docs/ru/sql-reference/data-types/special-data-types/nothing.md
index 30d425461e1..7a58d52573f 100644
--- a/docs/ru/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/ru/sql-reference/data-types/special-data-types/nothing.md
@@ -5,9 +5,9 @@ toc_title: Nothing
 
 # Nothing {#nothing}
 
-Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения.
+Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/syntax.md#null-literal), т.е. отсутствие значения.
 
-Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/special-data-types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
+Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
 
 ``` sql
 SELECT toTypeName(Array())

From c23e451b04fea1f6332658b9c392fd29b1185677 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 04:22:40 +0200
Subject: [PATCH 39/47] Add nodiscard attribute to immutable methods of IColumn
 to avoid errors

---
 src/Columns/IColumn.h | 110 +++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 9bf5e067cce..f4986799d47 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -45,64 +45,64 @@ private:
     /// This is internal method to use from COW.
     /// It performs shallow copy with copy-ctor and not useful from outside.
     /// If you want to copy column for modification, look at 'mutate' method.
-    virtual MutablePtr clone() const = 0;
+    [[nodiscard]] virtual MutablePtr clone() const = 0;
 
 public:
     /// Name of a Column. It is used in info messages.
-    virtual std::string getName() const { return getFamilyName(); }
+    [[nodiscard]] virtual std::string getName() const { return getFamilyName(); }
 
     /// Name of a Column kind, without parameters (example: FixedString, Array).
-    virtual const char * getFamilyName() const = 0;
+    [[nodiscard]] virtual const char * getFamilyName() const = 0;
 
     /// Type of data that column contains. It's an underlying type: UInt16 for Date, UInt32 for DateTime, so on.
-    virtual TypeIndex getDataType() const = 0;
+    [[nodiscard]] virtual TypeIndex getDataType() const = 0;
 
     /** If column isn't constant, returns itself.
       * If column is constant, transforms constant to full column (if column type allows such transform) and return it.
       */
-    virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
 
     /// If column isn't ColumnLowCardinality, return itself.
     /// If column is ColumnLowCardinality, transforms it to full column.
-    virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
 
     /// If column isn't ColumnSparse, return itself.
     /// If column is ColumnSparse, transforms it to full column.
-    virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
+    [[nodiscard]] virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
 
-    Ptr convertToFullIfNeeded() const
+    [[nodiscard]] Ptr convertToFullIfNeeded() const
     {
         return convertToFullColumnIfSparse()->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
     }
 
     /// Creates empty column with the same type.
-    virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
+    [[nodiscard]] virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
 
     /// Creates column with the same type and specified size.
     /// If size is less current size, then data is cut.
     /// If size is greater, than default values are appended.
-    virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
 
     /// Returns number of values in column.
-    virtual size_t size() const = 0;
+    [[nodiscard]] virtual size_t size() const = 0;
 
     /// There are no values in columns.
-    bool empty() const { return size() == 0; }
+    [[nodiscard]] bool empty() const { return size() == 0; }
 
     /// Returns value of n-th element in universal Field representation.
     /// Is used in rare cases, since creation of Field instance is expensive usually.
-    virtual Field operator[](size_t n) const = 0;
+    [[nodiscard]] virtual Field operator[](size_t n) const = 0;
 
     /// Like the previous one, but avoids extra copying if Field is in a container, for example.
     virtual void get(size_t n, Field & res) const = 0;
 
     /// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception)
     /// Is used to optimize some computations (in aggregation, for example).
-    virtual StringRef getDataAt(size_t n) const = 0;
+    [[nodiscard]] virtual StringRef getDataAt(size_t n) const = 0;
 
     /// Like getData, but has special behavior for columns that contain variable-length strings.
     /// Returns zero-ending memory chunk (i.e. its size is 1 byte longer).
-    virtual StringRef getDataAtWithTerminatingZero(size_t n) const
+    [[nodiscard]] virtual StringRef getDataAtWithTerminatingZero(size_t n) const
     {
         return getDataAt(n);
     }
@@ -110,19 +110,19 @@ public:
     /// If column stores integers, it returns n-th element transformed to UInt64 using static_cast.
     /// If column stores floating point numbers, bits of n-th elements are copied to lower bits of UInt64, the remaining bits are zeros.
     /// Is used to optimize some computations (in aggregation, for example).
-    virtual UInt64 get64(size_t /*n*/) const
+    [[nodiscard]] virtual UInt64 get64(size_t /*n*/) const
     {
         throw Exception("Method get64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// If column stores native numeric type, it returns n-th element casted to Float64
     /// Is used in regression methods to cast each features into uniform type
-    virtual Float64 getFloat64(size_t /*n*/) const
+    [[nodiscard]] virtual Float64 getFloat64(size_t /*n*/) const
     {
         throw Exception("Method getFloat64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual Float32 getFloat32(size_t /*n*/) const
+    [[nodiscard]] virtual Float32 getFloat32(size_t /*n*/) const
     {
         throw Exception("Method getFloat32 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -131,31 +131,31 @@ public:
       * For NULL values of Nullable column it is allowed to return arbitrary value.
       * Otherwise throw an exception.
       */
-    virtual UInt64 getUInt(size_t /*n*/) const
+    [[nodiscard]] virtual UInt64 getUInt(size_t /*n*/) const
     {
         throw Exception("Method getUInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual Int64 getInt(size_t /*n*/) const
+    [[nodiscard]] virtual Int64 getInt(size_t /*n*/) const
     {
         throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    virtual bool isDefaultAt(size_t n) const = 0;
-    virtual bool isNullAt(size_t /*n*/) const { return false; }
+    [[nodiscard]] virtual bool isDefaultAt(size_t n) const = 0;
+    [[nodiscard]] virtual bool isNullAt(size_t /*n*/) const { return false; }
 
     /** If column is numeric, return value of n-th element, casted to bool.
       * For NULL values of Nullable column returns false.
       * Otherwise throw an exception.
       */
-    virtual bool getBool(size_t /*n*/) const
+    [[nodiscard]] virtual bool getBool(size_t /*n*/) const
     {
         throw Exception("Method getBool is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Removes all elements outside of specified range.
     /// Is used in LIMIT operation, for example.
-    virtual Ptr cut(size_t start, size_t length) const
+    [[nodiscard]] virtual Ptr cut(size_t start, size_t length) const
     {
         MutablePtr res = cloneEmpty();
         res->insertRangeFrom(*this, start, length);
@@ -249,7 +249,7 @@ public:
       * otherwise (i.e. < 0), makes reserve() using size of source column.
       */
     using Filter = PaddedPODArray<UInt8>;
-    virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
+    [[nodiscard]] virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
 
     /** Expand column by mask inplace. After expanding column will
       * satisfy the following: if we filter it by given mask, we will
@@ -262,11 +262,11 @@ public:
     /// Permutes elements using specified permutation. Is used in sorting.
     /// limit - if it isn't 0, puts only first limit elements in the result.
     using Permutation = PaddedPODArray<size_t>;
-    virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
+    [[nodiscard]] virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
 
     /// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
     /// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
-    virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
+    [[nodiscard]] virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
 
     /** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
       * Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
@@ -279,10 +279,10 @@ public:
       *
       * For non Nullable and non floating point types, nan_direction_hint is ignored.
       */
-    virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
+    [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
 
     /// Equivalent to compareAt, but collator is used to compare values.
-    virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
+    [[nodiscard]] virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
     {
         throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
     }
@@ -297,7 +297,7 @@ public:
                                int direction, int nan_direction_hint) const = 0;
 
     /// Check if all elements in the column have equal values. Return true if column is empty.
-    virtual bool hasEqualValues() const = 0;
+    [[nodiscard]] virtual bool hasEqualValues() const = 0;
 
     enum class PermutationSortDirection : uint8_t
     {
@@ -353,7 +353,7 @@ public:
       */
     using Offset = UInt64;
     using Offsets = PaddedPODArray<Offset>;
-    virtual Ptr replicate(const Offsets & offsets) const = 0;
+    [[nodiscard]] virtual Ptr replicate(const Offsets & offsets) const = 0;
 
     /** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
       * Selector must contain values from 0 to num_columns - 1.
@@ -361,7 +361,7 @@ public:
       */
     using ColumnIndex = UInt64;
     using Selector = PaddedPODArray<ColumnIndex>;
-    virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
+    [[nodiscard]] virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
 
     /// Insert data from several other columns according to source mask (used in vertical merge).
     /// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop
@@ -385,15 +385,15 @@ public:
     virtual void ensureOwnership() {}
 
     /// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
-    virtual size_t byteSize() const = 0;
+    [[nodiscard]] virtual size_t byteSize() const = 0;
 
     /// Size of single value in memory (for accounting purposes)
-    virtual size_t byteSizeAt(size_t /*n*/) const = 0;
+    [[nodiscard]] virtual size_t byteSizeAt(size_t /*n*/) const = 0;
 
     /// Size of memory, allocated for column.
     /// This is greater or equals to byteSize due to memory reservation in containers.
     /// Zero, if could not be determined.
-    virtual size_t allocatedBytes() const = 0;
+    [[nodiscard]] virtual size_t allocatedBytes() const = 0;
 
     /// Make memory region readonly with mprotect if it is large enough.
     /// The operation is slow and performed only for debug builds.
@@ -406,14 +406,14 @@ public:
 
     /// Columns have equal structure.
     /// If true - you can use "compareAt", "insertFrom", etc. methods.
-    virtual bool structureEquals(const IColumn &) const
+    [[nodiscard]] virtual bool structureEquals(const IColumn &) const
     {
         throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Returns ration of values in column, that equal to default value of column.
     /// Checks only @sample_ratio ratio of rows.
-    virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
+    [[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
 
     /// Returns indices of values in column, that not equal to default value of column.
     virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
@@ -423,13 +423,13 @@ public:
     /// Other values are filled by @default_value.
     /// @shift means how much rows to skip from the beginning of current column.
     /// Used to create full column from sparse.
-    virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
+    [[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
 
-    virtual SerializationInfoPtr getSerializationInfo() const;
+    [[nodiscard]] virtual SerializationInfoPtr getSerializationInfo() const;
 
     /// Compress column in memory to some representation that allows to decompress it back.
     /// Return itself if compression is not applicable for this column type.
-    virtual Ptr compress() const
+    [[nodiscard]] virtual Ptr compress() const
     {
         /// No compression by default.
         return getPtr();
@@ -437,13 +437,13 @@ public:
 
     /// If it's CompressedColumn, decompress it and return.
     /// Otherwise return itself.
-    virtual Ptr decompress() const
+    [[nodiscard]] virtual Ptr decompress() const
     {
         return getPtr();
     }
 
 
-    static MutablePtr mutate(Ptr ptr)
+    [[nodiscard]] static MutablePtr mutate(Ptr ptr)
     {
         MutablePtr res = ptr->shallowMutate(); /// Now use_count is 2.
         ptr.reset(); /// Reset use_count to 1.
@@ -463,10 +463,10 @@ public:
     /// Various properties on behaviour of column type.
 
     /// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc.
-    virtual bool isNullable() const { return false; }
+    [[nodiscard]] virtual bool isNullable() const { return false; }
 
     /// It's a special kind of column, that contain single value, but is not a ColumnConst.
-    virtual bool isDummy() const { return false; }
+    [[nodiscard]] virtual bool isDummy() const { return false; }
 
     /** Memory layout properties.
       *
@@ -486,32 +486,32 @@ public:
       */
 
     /// Values in column have fixed size (including the case when values span many memory segments).
-    virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
+    [[nodiscard]] virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
 
     /// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize.
-    virtual bool isFixedAndContiguous() const { return false; }
+    [[nodiscard]] virtual bool isFixedAndContiguous() const { return false; }
 
     /// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
-    virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
 
     /// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
-    virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
+    [[nodiscard]] virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
 
     /// Column is ColumnVector of numbers or ColumnConst of it. Note that Nullable columns are not numeric.
-    virtual bool isNumeric() const { return false; }
+    [[nodiscard]] virtual bool isNumeric() const { return false; }
 
     /// If the only value column can contain is NULL.
     /// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
-    virtual bool onlyNull() const { return false; }
+    [[nodiscard]] virtual bool onlyNull() const { return false; }
 
     /// Can be inside ColumnNullable.
-    virtual bool canBeInsideNullable() const { return false; }
+    [[nodiscard]] virtual bool canBeInsideNullable() const { return false; }
 
-    virtual bool lowCardinality() const { return false; }
+    [[nodiscard]] virtual bool lowCardinality() const { return false; }
 
-    virtual bool isSparse() const { return false; }
+    [[nodiscard]] virtual bool isSparse() const { return false; }
 
-    virtual bool isCollationSupported() const { return false; }
+    [[nodiscard]] virtual bool isCollationSupported() const { return false; }
 
     virtual ~IColumn() = default;
     IColumn() = default;
@@ -519,7 +519,7 @@ public:
 
     /** Print column name, size, and recursively print all subcolumns.
       */
-    String dumpStructure() const;
+    [[nodiscard]] String dumpStructure() const;
 
 protected:
     /// Template is to devirtualize calls to insertFrom method.

From f86808914c7e4d6591ad5a1c083ac91f56af6005 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 18 Apr 2022 04:44:30 +0200
Subject: [PATCH 40/47] Fix strange trash in Keeper

---
 src/Coordination/FourLetterCommand.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 544dc3e141b..9553279d955 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -298,7 +298,7 @@ String ServerStatCommand::run()
     write("Latency min/avg/max", latency.str());
 
     write("Received", toString(stats.getPacketsReceived()));
-    write("Sent ", toString(stats.getPacketsSent()));
+    write("Sent", toString(stats.getPacketsSent()));
     write("Connections", toString(keeper_info.alive_connections_count));
     write("Outstanding", toString(keeper_info.outstanding_requests_count));
     write("Zxid", toString(keeper_info.last_zxid));
@@ -328,7 +328,7 @@ String StatCommand::run()
     write("Latency min/avg/max", latency.str());
 
     write("Received", toString(stats.getPacketsReceived()));
-    write("Sent ", toString(stats.getPacketsSent()));
+    write("Sent", toString(stats.getPacketsSent()));
     write("Connections", toString(keeper_info.alive_connections_count));
     write("Outstanding", toString(keeper_info.outstanding_requests_count));
     write("Zxid", toString(keeper_info.last_zxid));

From 735c38cdccafe8936c6edcc03f750abff6fe0c16 Mon Sep 17 00:00:00 2001
From: fenglv <fenglv15@mails.ucas.ac.cn>
Date: Mon, 18 Apr 2022 04:59:41 +0000
Subject: [PATCH 41/47] fix typo

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 27212aa0498..d2c56747d23 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -582,7 +582,7 @@ class IColumn;
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
-    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, disable by default", 0) \
+    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
 

From 5670dcf7501dcdff7acd5490ecb5e42d93f59c8b Mon Sep 17 00:00:00 2001
From: zhanglistar <zhanglinuxstar@gmail.com>
Date: Mon, 18 Apr 2022 17:52:59 +0800
Subject: [PATCH 42/47] Fix crash in debug mode of ReadBufferFromHDFS.

---
 src/Storages/HDFS/ReadBufferFromHDFS.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
index b3dfb5a8061..0ca13eac264 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
@@ -106,10 +106,11 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
         if (whence != SEEK_SET)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Only SEEK_SET is supported");
 
-        file_offset = file_offset_;
-        int seek_status = hdfsSeek(fs.get(), fin, file_offset);
+        int seek_status = hdfsSeek(fs.get(), fin, file_offset_);
         if (seek_status != 0)
             throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError()));
+        file_offset = file_offset_;
+        resetWorkingBuffer();
         return file_offset;
     }
 
@@ -137,7 +138,7 @@ std::optional<size_t> ReadBufferFromHDFS::getTotalSize()
 bool ReadBufferFromHDFS::nextImpl()
 {
     impl->position() = impl->buffer().begin() + offset();
-    auto result = impl->nextImpl();
+    auto result = impl->next();
 
     if (result)
         BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl`

From 91979cdd1758122fef2f93d746f103a9e2889fd7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 18 Apr 2022 21:37:49 +0300
Subject: [PATCH 43/47] jemalloc: enable logging in debug build

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 contrib/jemalloc-cmake/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt
index 41b042df95b..9896ac8b9b8 100644
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@@ -155,7 +155,12 @@ target_include_directories(_jemalloc SYSTEM PRIVATE
 target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
 
 if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
-    target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_DEBUG=1)
+    target_compile_definitions(_jemalloc PRIVATE
+        -DJEMALLOC_DEBUG=1
+        # Usage examples:
+        # - MALLOC_CONF=log:.
+        # - MALLOC_CONF='log:core.malloc.exit|core.sallocx.entry|core.sdallocx.entry'
+        -DJEMALLOC_LOG=1)
 endif ()
 
 target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)

From e96ba17c3001101e3d3d431e404e1c50ed0e40d6 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 19 Apr 2022 09:45:40 +0200
Subject: [PATCH 44/47] Fallback to a default event data when API is broken

---
 tests/ci/pr_info.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index 0de4ec89124..be15b0817bf 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import json
+import logging
 import os
 
 from unidiff import PatchSet  # type: ignore
@@ -98,12 +99,20 @@ class PRInfo:
         if "pull_request" in github_event:  # pull request and other similar events
             self.number = github_event["pull_request"]["number"]
             if pr_event_from_api:
-                response = get_with_retries(
-                    f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
-                    f"/pulls/{self.number}",
-                    sleep=RETRY_SLEEP,
-                )
-                github_event["pull_request"] = response.json()
+                try:
+                    response = get_with_retries(
+                        f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
+                        f"/pulls/{self.number}",
+                        sleep=RETRY_SLEEP,
+                    )
+                    github_event["pull_request"] = response.json()
+                except Exception as e:
+                    logging.warning(
+                        "Unable to get pull request event %s from API, "
+                        "fallback to received event. Exception: %s",
+                        self.number,
+                        e,
+                    )
 
             if "after" in github_event:
                 self.sha = github_event["after"]

From 8785f0095359ce0a3fc5d1631abb86bcb371eac6 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 19 Apr 2022 09:55:22 +0200
Subject: [PATCH 45/47] Check a proper number of reports in
 BuilderSpecialReport

---
 .github/workflows/master.yml       | 27 ++++++++++++++++-----------
 .github/workflows/pull_request.yml | 25 +++++++++++++++----------
 2 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index efaf1c64c05..051bd5a6245 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -979,15 +979,14 @@ jobs:
 ############################################################################################
   BuilderReport:
     needs:
-      - BuilderDebRelease
-      - BuilderDebAarch64
       - BuilderBinRelease
-      - BuilderBinGCC
+      - BuilderDebAarch64
       - BuilderDebAsan
+      - BuilderDebDebug
+      - BuilderDebMsan
+      - BuilderDebRelease
       - BuilderDebTsan
       - BuilderDebUBsan
-      - BuilderDebMsan
-      - BuilderDebDebug
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Set envs
@@ -1026,17 +1025,23 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
   BuilderSpecialReport:
     needs:
+      - BuilderBinAarch64
+      - BuilderBinDarwin
+      - BuilderBinDarwinAarch64
+      - BuilderBinFreeBSD
+      - BuilderBinGCC
+      - BuilderBinPPC64
       - BuilderBinTidy
       - BuilderDebSplitted
-      - BuilderBinDarwin
-      - BuilderBinAarch64
-      - BuilderBinFreeBSD
-      - BuilderBinDarwinAarch64
-      - BuilderBinPPC64
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Set envs
         run: |
+          DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
+          ${{ toJSON(needs) }}
+          EOF
+          )
+          echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
           cat >> "$GITHUB_ENV" << 'EOF'
           TEMP_PATH=${{runner.temp}}/report_check
           REPORTS_PATH=${{runner.temp}}/reports_dir
@@ -1056,7 +1061,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
+          python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
       - name: Cleanup
         if: always()
         run: |
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 0e68a68e8f5..2e83382da8d 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -1036,15 +1036,14 @@ jobs:
 ############################################################################################
   BuilderReport:
     needs:
-      - BuilderDebRelease
-      - BuilderDebAarch64
       - BuilderBinRelease
-      - BuilderBinGCC
+      - BuilderDebAarch64
       - BuilderDebAsan
+      - BuilderDebDebug
+      - BuilderDebMsan
+      - BuilderDebRelease
       - BuilderDebTsan
       - BuilderDebUBsan
-      - BuilderDebMsan
-      - BuilderDebDebug
     runs-on: [self-hosted, style-checker]
     if: ${{ success() || failure() }}
     steps:
@@ -1083,18 +1082,24 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
   BuilderSpecialReport:
     needs:
-      - BuilderDebSplitted
-      - BuilderBinTidy
-      - BuilderBinDarwin
       - BuilderBinAarch64
-      - BuilderBinFreeBSD
+      - BuilderBinDarwin
       - BuilderBinDarwinAarch64
+      - BuilderBinFreeBSD
+      - BuilderBinGCC
       - BuilderBinPPC64
+      - BuilderBinTidy
+      - BuilderDebSplitted
     runs-on: [self-hosted, style-checker]
     if: ${{ success() || failure() }}
     steps:
       - name: Set envs
         run: |
+          DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
+          ${{ toJSON(needs) }}
+          EOF
+          )
+          echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
           cat >> "$GITHUB_ENV" << 'EOF'
           TEMP_PATH=${{runner.temp}}/report_check
           REPORTS_PATH=${{runner.temp}}/reports_dir
@@ -1114,7 +1119,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 build_report_check.py "$CHECK_NAME"
+          python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
       - name: Cleanup
         if: always()
         run: |

From 89d0c30f38ee10bf4c2224517b414de3d03cab88 Mon Sep 17 00:00:00 2001
From: xiedeyantu <czjourney@163.com>
Date: Tue, 19 Apr 2022 19:51:51 +0800
Subject: [PATCH 46/47] Fix the repeated call of func to get the table when
 drop table (#36248)

---
 src/Databases/DatabaseAtomic.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 622d38e01bd..d31d67697ee 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -110,18 +110,19 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String &
 
 void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool no_delay)
 {
-    auto storage = tryGetTable(table_name, local_context);
+    auto table = tryGetTable(table_name, local_context);
     /// Remove the inner table (if any) to avoid deadlock
     /// (due to attempt to execute DROP from the worker thread)
-    if (storage)
-        storage->dropInnerTableIfAny(no_delay, local_context);
+    if (table)
+        table->dropInnerTableIfAny(no_delay, local_context);
+    else
+        throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
+                        backQuote(database_name), backQuote(table_name));
 
     String table_metadata_path = getObjectMetadataPath(table_name);
     String table_metadata_path_drop;
-    StoragePtr table;
     {
         std::unique_lock lock(mutex);
-        table = getTableUnlocked(table_name, lock);
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
         auto txn = local_context->getZooKeeperMetadataTransaction();
         if (txn && !local_context->isInternalSubquery())

From 3e77340a81817fc0da8a0f89343bd0b81a85fa4f Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 19 Apr 2022 14:03:00 +0200
Subject: [PATCH 47/47] Allow clickhouse-keeper to serve 4-letter commands
 before quorum (#35992)

* Use async start for clichkouse-keeper

* Return non active server for 4lw commands if quorum not achieved
---
 programs/keeper/Keeper.cpp              |  2 +-
 src/Coordination/CoordinationSettings.h |  4 +--
 src/Coordination/FourLetterCommand.cpp  | 46 +++++++++++++++++++++++--
 src/Coordination/FourLetterCommand.h    |  3 ++
 src/Server/KeeperTCPHandler.cpp         | 10 ++++--
 5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 1d9bbef58a5..e6df2d21502 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -341,7 +341,7 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
     auto servers = std::make_shared<std::vector<ProtocolServerAdapter>>();
 
     /// Initialize keeper RAFT. Do nothing if no keeper_server in config.
-    tiny_context.initializeKeeperDispatcher(/* start_async = */false);
+    tiny_context.initializeKeeperDispatcher(/* start_async = */ true);
     FourLetterCommandFactory::registerCommands(*tiny_context.getKeeperDispatcher());
 
     auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration &
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 03806a513ca..b702b553a03 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -29,8 +29,8 @@ struct Settings;
     M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
     M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
     M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
-    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
-    M(Milliseconds, startup_timeout, 180000, "How many time we will until RAFT to start", 0) \
+    M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
+    M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
     M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
     M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
     M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 9553279d955..f9b1427a859 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -44,6 +44,11 @@ int32_t IFourLetterCommand::toCode(const String & name)
     return __builtin_bswap32(res);
 }
 
+bool IFourLetterCommand::serverIsActive() const
+{
+    return keeper_dispatcher.hasLeader();
+}
+
 IFourLetterCommand::~IFourLetterCommand() = default;
 
 FourLetterCommandFactory & FourLetterCommandFactory::instance()
@@ -198,6 +203,8 @@ void print(IFourLetterCommand::StringBuffer & buf, const String & key, uint64_t
     print(buf, key, toString(value));
 }
 
+constexpr auto * SERVER_NOT_ACTIVE_MSG = "This instance is not currently serving requests";
+
 }
 
 String MonitorCommand::run()
@@ -205,11 +212,11 @@ String MonitorCommand::run()
     auto & stats = keeper_dispatcher.getKeeperConnectionStats();
     Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
 
-    if (!keeper_info.has_leader)
-        return "This instance is not currently serving requests";
-
     const auto & state_machine = keeper_dispatcher.getStateMachine();
 
+    if (!keeper_info.has_leader)
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer ret;
     print(ret, "version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
 
@@ -247,6 +254,9 @@ String MonitorCommand::run()
 
 String StatResetCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     keeper_dispatcher.resetConnectionStats();
     return "Server stats reset.\n";
 }
@@ -258,6 +268,9 @@ String NopCommand::run()
 
 String ConfCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     keeper_dispatcher.getKeeperConfigurationAndSettings()->dump(buf);
     return buf.str();
@@ -265,6 +278,9 @@ String ConfCommand::run()
 
 String ConsCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     KeeperTCPHandler::dumpConnections(buf, false);
     return buf.str();
@@ -272,12 +288,18 @@ String ConsCommand::run()
 
 String RestConnStatsCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     KeeperTCPHandler::resetConnsStats();
     return "Connection stats reset.\n";
 }
 
 String ServerStatCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
 
     auto write = [&buf](const String & key, const String & value)
@@ -310,6 +332,9 @@ String ServerStatCommand::run()
 
 String StatCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
 
     auto write = [&buf] (const String & key, const String & value) { buf << key << ": " << value << '\n'; };
@@ -340,6 +365,9 @@ String StatCommand::run()
 
 String BriefWatchCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     const auto & state_machine = keeper_dispatcher.getStateMachine();
     buf << state_machine.getSessionsWithWatchesCount() << " connections watching "
@@ -350,6 +378,9 @@ String BriefWatchCommand::run()
 
 String WatchCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     const auto & state_machine = keeper_dispatcher.getStateMachine();
     state_machine.dumpWatches(buf);
@@ -358,6 +389,9 @@ String WatchCommand::run()
 
 String WatchByPathCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     const auto & state_machine = keeper_dispatcher.getStateMachine();
     state_machine.dumpWatchesByPath(buf);
@@ -366,6 +400,9 @@ String WatchByPathCommand::run()
 
 String DataSizeCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     buf << "snapshot_dir_size: " << keeper_dispatcher.getSnapDirSize() << '\n';
     buf << "log_dir_size: " << keeper_dispatcher.getLogDirSize() << '\n';
@@ -374,6 +411,9 @@ String DataSizeCommand::run()
 
 String DumpCommand::run()
 {
+    if (!serverIsActive())
+        return SERVER_NOT_ACTIVE_MSG;
+
     StringBuffer buf;
     const auto & state_machine = keeper_dispatcher.getStateMachine();
     state_machine.dumpSessionsAndEphemerals(buf);
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index d190c6c7d9b..6d288d55605 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -32,6 +32,9 @@ public:
     static String toName(int32_t code);
     static inline int32_t toCode(const String & name);
 
+    // Return true if server is running and serving requests
+    bool serverIsActive() const;
+
 protected:
     KeeperDispatcher & keeper_dispatcher;
 };
diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index d6eca9bec2f..50af74bc6ce 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -345,7 +345,11 @@ void KeeperTCPHandler::runImpl()
         return;
     }
 
-    if (keeper_dispatcher->checkInit() && keeper_dispatcher->hasLeader())
+    // we store the checks because they can change during the execution
+    // leading to weird results
+    const auto is_initialized = keeper_dispatcher->checkInit();
+    const auto has_leader = keeper_dispatcher->hasLeader();
+    if (is_initialized && has_leader)
     {
         try
         {
@@ -366,9 +370,9 @@ void KeeperTCPHandler::runImpl()
     else
     {
         String reason;
-        if (!keeper_dispatcher->checkInit() && !keeper_dispatcher->hasLeader())
+        if (!is_initialized && !has_leader)
             reason = "server is not initialized yet and no alive leader exists";
-        else if (!keeper_dispatcher->checkInit())
+        else if (!is_initialized)
             reason = "server is not initialized yet";
         else
             reason = "no alive leader exists";