From a6254516e0588e43c9ec5ea7711b765458acfedf Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Tue, 11 Jan 2022 09:58:53 +0800 Subject: [PATCH 01/47] Fix Alter ttl modification unsupported table engine --- src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Storages/AlterCommands.cpp | 6 +- src/Storages/AlterCommands.h | 2 +- src/Storages/IStorage.h | 3 + src/Storages/MergeTree/MergeTreeData.h | 2 + src/Storages/StorageFactory.h | 1 + .../02184_storage_add_support_ttl.reference | 0 .../02184_storage_add_support_ttl.sql | 59 +++++++++++++++++++ 8 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02184_storage_add_support_ttl.reference create mode 100644 tests/queries/0_stateless/02184_storage_add_support_ttl.sql diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index d01f2b05567..bd6839d72d0 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -170,7 +170,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout); StorageInMemoryMetadata metadata = table->getInMemoryMetadata(); - alter_commands.validate(metadata, getContext()); + alter_commands.validate(table, getContext()); alter_commands.prepare(metadata); table->checkAlterIsPossible(alter_commands, getContext()); table->alter(alter_commands, getContext(), alter_lock); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 90e10abfa92..e35cf553b63 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -998,8 +998,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) } -void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPtr context) const +void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { + const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata(); auto all_columns = metadata.columns; /// Default expression for all added/modified columns ASTPtr default_expr_list = std::make_shared(); @@ -1008,6 +1009,9 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt { const auto & command = (*this)[i]; + if (command.ttl && !table->supportsTTL()) + throw Exception("Engine " + table->getName() + " doesn't support TTL clause", ErrorCodes::BAD_ARGUMENTS); + const auto & column_name = command.column_name; if (command.type == AlterCommand::ADD_COLUMN) { diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index dce6b496741..71c622cb9be 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -186,7 +186,7 @@ public: /// Checks that all columns exist and dependencies between them. /// This check is lightweight and base only on metadata. /// More accurate check have to be performed with storage->checkAlterIsPossible. - void validate(const StorageInMemoryMetadata & metadata, ContextPtr context) const; + void validate(const StoragePtr & table, ContextPtr context) const; /// Prepare alter commands. Set ignore flag to some of them and set some /// parts to commands from storage's metadata (for example, absent default) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6342c3f6b47..99f57ce058a 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -132,6 +132,9 @@ public: /// Returns true if the storage supports insert queries with the PARTITION BY section. virtual bool supportsPartitionBy() const { return false; } + /// Returns true if the storage supports queries with the TTL section. + virtual bool supportsTTL() const { return false; } + /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index ebd1950a720..05d5efb0eed 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -414,6 +414,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsTTL() const override { return true; } + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 6ffa6327176..77309541374 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -59,6 +59,7 @@ public: bool supports_skipping_indices = false; bool supports_projections = false; bool supports_sort_order = false; + /// See also IStorage::supportsTTL() bool supports_ttl = false; /// See also IStorage::supportsReplication() bool supports_replication = false; diff --git a/tests/queries/0_stateless/02184_storage_add_support_ttl.reference b/tests/queries/0_stateless/02184_storage_add_support_ttl.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02184_storage_add_support_ttl.sql b/tests/queries/0_stateless/02184_storage_add_support_ttl.sql new file mode 100644 index 00000000000..dca8ee3105c --- /dev/null +++ b/tests/queries/0_stateless/02184_storage_add_support_ttl.sql @@ -0,0 +1,59 @@ +DROP TABLE IF EXISTS mergeTree_02184; +CREATE TABLE mergeTree_02184 (id UInt64, name String, dt Date) Engine=MergeTree ORDER BY id; +ALTER TABLE mergeTree_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; +DETACH TABLE mergeTree_02184; +ATTACH TABLE mergeTree_02184; + +DROP TABLE IF EXISTS distributed_02184; +CREATE TABLE distributed_02184 (id UInt64, name String, dt Date) Engine=Distributed('test_cluster_two_shards', 'default', 'mergeTree_02184', rand()); +ALTER TABLE distributed_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE distributed_02184; +ATTACH TABLE distributed_02184; + +DROP TABLE IF EXISTS buffer_02184; +CREATE TABLE buffer_02184 (id UInt64, name String, dt Date) ENGINE = Buffer(default, mergeTree_02184, 16, 10, 100, 10000, 1000000, 10000000, 100000000); +ALTER TABLE buffer_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE buffer_02184; +ATTACH TABLE buffer_02184; + +DROP TABLE IF EXISTS merge_02184; +CREATE TABLE merge_02184 (id UInt64, name String, dt Date) ENGINE = Merge('default', 'distributed_02184'); +ALTER TABLE merge_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE merge_02184; +ATTACH TABLE merge_02184; + +DROP TABLE IF EXISTS null_02184; +CREATE TABLE null_02184 AS system.one Engine=Null(); +ALTER TABLE null_02184 MODIFY COLUMN dummy Int TTL now() + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE null_02184; +ATTACH TABLE null_02184; + +DROP TABLE IF EXISTS file_02184; +CREATE TABLE file_02184 (id UInt64, name String, dt Date) ENGINE = File(TabSeparated); +ALTER TABLE file_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE file_02184; +ATTACH TABLE file_02184; + +DROP TABLE IF EXISTS memory_02184; +CREATE TABLE memory_02184 (id UInt64, name String, dt Date) ENGINE = Memory(); +ALTER TABLE memory_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE memory_02184; +ATTACH TABLE memory_02184; + +DROP TABLE IF EXISTS log_02184; +CREATE TABLE log_02184 (id UInt64, name String, dt Date) ENGINE = Log(); +ALTER TABLE log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE log_02184; +ATTACH TABLE log_02184; + +DROP TABLE IF EXISTS ting_log_02184; +CREATE TABLE ting_log_02184 (id UInt64, name String, dt Date) ENGINE = TinyLog(); +ALTER TABLE ting_log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE ting_log_02184; +ATTACH TABLE ting_log_02184; + +DROP TABLE IF EXISTS stripe_log_02184; +CREATE TABLE stripe_log_02184 (id UInt64, name String, dt Date) ENGINE = StripeLog; +ALTER TABLE stripe_log_02184 MODIFY COLUMN name String TTL dt + INTERVAL 1 MONTH; -- { serverError BAD_ARGUMENTS } +DETACH TABLE stripe_log_02184; +ATTACH TABLE stripe_log_02184; From 44b221fd22f9d8c0fa6f563d80e1dfafbf6726a8 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Fri, 8 Apr 2022 10:53:08 -0600 Subject: [PATCH 02/47] Updated adopters: added 4 new adopters --- docs/en/introduction/adopters.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 98eea85bbfa..e4d60a4fc9e 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -28,6 +28,7 @@ toc_title: Adopters | Badoo | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) | | Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | | Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| Better Stack | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | | BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | | Bloomberg | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | @@ -112,7 +113,7 @@ toc_title: Adopters | NLMK | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) | | NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | | Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) -| ntop | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | +| ntop | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | | Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | | Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | @@ -123,6 +124,7 @@ toc_title: Adopters | Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | | Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | | Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | +| PingCAP | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) | | Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | PostHog | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) | | Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | @@ -159,6 +161,7 @@ toc_title: Adopters | Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | | Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | | Swetrix | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) | +| Synpse | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) | | Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | @@ -172,6 +175,7 @@ toc_title: Adopters | UTMSTAT | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) | | Vercel | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 | | VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| VKontech | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) | | VMware | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | | Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | | Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | From ae88549c4f330712962fcf805d6bb8680fee6bb4 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Apr 2022 20:02:52 +0000 Subject: [PATCH 03/47] Allow insert select for files with formats without schema inference --- src/Formats/ReadSchemaUtils.cpp | 2 +- src/TableFunctions/ITableFunctionFileLike.cpp | 23 ------------------- ..._format_without_schema_inference.reference | 1 + ...t_with_format_without_schema_inference.sql | 2 ++ 4 files changed, 4 insertions(+), 24 deletions(-) create mode 100644 tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference create mode 100644 tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 3e88b51152d..ccb017a60ac 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -94,7 +94,7 @@ ColumnsDescription readSchemaFromFormat( } } else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference. You must specify the structure manually", format_name); return ColumnsDescription(names_and_types); } diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 3388a7ec9f6..cf4a62a30f4 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -25,24 +25,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace -{ - void checkIfFormatSupportsAutoStructure(const String & name, const String & format) - { - if (name == "file" && format == "Distributed") - return; - - if (FormatFactory::instance().checkIfFormatHasAnySchemaReader(format)) - return; - - throw Exception( - "Table function '" + name - + "' allows automatic structure determination only for formats that support schema inference and for Distributed format in table function " - "'file'", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } -} - void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) { /// Parse args @@ -68,18 +50,13 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context format = FormatFactory::instance().getFormatFromFileName(filename, true); if (args.size() <= 2) - { - checkIfFormatSupportsAutoStructure(getName(), format); return; - } if (args.size() != 3 && args.size() != 4) throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); structure = args[2]->as().value.safeGet(); - if (structure == "auto") - checkIfFormatSupportsAutoStructure(getName(), format); if (structure.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql new file mode 100644 index 00000000000..8ea9dba2696 --- /dev/null +++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql @@ -0,0 +1,2 @@ +insert into function file('02269_data', 'RowBinary') select 1; +select * from file('02269_data', 'RowBinary', 'x UInt8'); From 31f469a8749de3e0cce5529d391547e473485b9c Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Apr 2022 10:18:55 +0000 Subject: [PATCH 04/47] Split test 01675_data_type_coroutine into 2 tests to prevent possible timeouts --- .../01675_data_type_coroutine.reference | 1 - .../0_stateless/01675_data_type_coroutine.sh | 12 ------------ .../01675_data_type_coroutine_2.reference | 1 + .../01675_data_type_coroutine_2.sh | 19 +++++++++++++++++++ 4 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01675_data_type_coroutine_2.reference create mode 100755 tests/queries/0_stateless/01675_data_type_coroutine_2.sh diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 541dab48def..7326d960397 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1,2 +1 @@ Ok -Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 9f7d5401bd2..4106d0d7f73 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -17,15 +17,3 @@ while [[ $counter -lt $retries ]]; do done echo 'Ok' - -counter=0 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") - ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; - ((++counter)) -done - -#echo "I = ${I}" -echo 'Ok' diff --git a/tests/queries/0_stateless/01675_data_type_coroutine_2.reference b/tests/queries/0_stateless/01675_data_type_coroutine_2.reference new file mode 100644 index 00000000000..7326d960397 --- /dev/null +++ b/tests/queries/0_stateless/01675_data_type_coroutine_2.reference @@ -0,0 +1 @@ +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine_2.sh b/tests/queries/0_stateless/01675_data_type_coroutine_2.sh new file mode 100755 index 00000000000..501b9d4ab12 --- /dev/null +++ b/tests/queries/0_stateless/01675_data_type_coroutine_2.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +counter=0 retries=60 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; + ((++counter)) +done + +#echo "I = ${I}" +echo 'Ok' From 428753a4a77135ad76a2b5f76f0b9c38bb5e74b6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Apr 2022 14:39:36 +0200 Subject: [PATCH 05/47] Fix integration tests parser --- tests/integration/ci-runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 05e56d2a910..a047c6c114c 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -461,6 +461,11 @@ class ClickhouseIntegrationTestsRunner: if test not in main_counters[state]: main_counters[state].append(test) + for state in ("SKIPPED",): + for test in current_counters[state]: + main_counters[state].append(test) + + def _get_runner_image_cmd(self, repo_path): image_cmd = "" if self._can_run_with( From c74b6c547b6638648e1df14a6dd5478d993c5333 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Apr 2022 14:40:56 +0200 Subject: [PATCH 06/47] Black --- tests/integration/ci-runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index a047c6c114c..687c88b04a3 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -465,7 +465,6 @@ class ClickhouseIntegrationTestsRunner: for test in current_counters[state]: main_counters[state].append(test) - def _get_runner_image_cmd(self, repo_path): image_cmd = "" if self._can_run_with( From e22a175afb1b0415895ddb195b9a29566802bc09 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Fri, 15 Apr 2022 14:04:30 +0800 Subject: [PATCH 07/47] Fix ReadBufferFromHDFS crash in debug mode --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 1bafa49e55b..58666d0f7f1 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -137,7 +137,7 @@ std::optional ReadBufferFromHDFS::getTotalSize() bool ReadBufferFromHDFS::nextImpl() { impl->position() = impl->buffer().begin() + offset(); - auto result = impl->next(); + auto result = impl->nextImpl(); if (result) BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset); /// use the buffer returned by `impl` From 58fc4e033ae143af6a5802d89787e25dad2c2e8e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Apr 2022 22:54:08 +0200 Subject: [PATCH 08/47] Remove trash --- programs/server/Server.cpp | 1 + src/Common/getNumberOfPhysicalCPUCores.cpp | 94 ++++++++++------------ 2 files changed, 42 insertions(+), 53 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index fc9187cb622..0120564e0b1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1716,6 +1716,7 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } + void Server::createServers( Poco::Util::AbstractConfiguration & config, const std::vector & listen_hosts, diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 1d26b522e64..872a2464eb6 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -12,75 +12,63 @@ #include #if defined(OS_LINUX) -unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count) +static int readFrom(const char * filename, int default_value) { - // Try to look at cgroups limit if it is available. - auto read_from = [](const char * filename, int default_value) -> int { - std::ifstream infile(filename); - if (!infile.is_open()) - { - return default_value; - } - int idata; - if (infile >> idata) - return idata; - else - return default_value; - }; + std::ifstream infile(filename); + if (!infile.is_open()) + return default_value; + int idata; + if (infile >> idata) + return idata; + else + return default_value; +} +/// Try to look at cgroups limit if it is available. +static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count) +{ unsigned quota_count = default_cpu_count; - // Return the number of milliseconds per period process is guaranteed to run. - // -1 for no quota - int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1); - int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1); + /// Return the number of milliseconds per period process is guaranteed to run. + /// -1 for no quota + int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1); + int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1); if (cgroup_quota > -1 && cgroup_period > 0) - { quota_count = ceil(static_cast(cgroup_quota) / static_cast(cgroup_period)); - } return std::min(default_cpu_count, quota_count); } -#endif // OS_LINUX +#endif unsigned getNumberOfPhysicalCPUCores() { - static const unsigned number = [] { - unsigned cpu_count = 0; // start with an invalid num + unsigned cpu_count = 0; // start with an invalid num + #if USE_CPUID - do - { - cpu_raw_data_t raw_data; - cpu_id_t data; + cpu_raw_data_t raw_data; + cpu_id_t data; - /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method. - /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151. - if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0) - { - // Just fallback - break; - } + /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method. + /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151. + /// Also, libcpuid gives strange result on Google Compute Engine VMs. + /// Example: + /// num_cores = 12, /// number of physical cores on current CPU socket + /// total_logical_cpus = 1, /// total number of logical cores on all sockets + /// num_logical_cpus = 24. /// number of logical cores on current CPU socket + /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1. - cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; - - /// Also, libcpuid gives strange result on Google Compute Engine VMs. - /// Example: - /// num_cores = 12, /// number of physical cores on current CPU socket - /// total_logical_cpus = 1, /// total number of logical cores on all sockets - /// num_logical_cpus = 24. /// number of logical cores on current CPU socket - /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1. - } while (false); + if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0) + cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; #endif - /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. - /// (Actually, only Aarch64 is supported). - if (cpu_count == 0) - cpu_count = std::thread::hardware_concurrency(); + /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. + /// (Actually, only Aarch64 is supported). + if (cpu_count == 0) + cpu_count = std::thread::hardware_concurrency(); #if defined(OS_LINUX) - /// TODO: add a setting for disabling that, similar to UseContainerSupport in java - cpu_count = getCGroupLimitedCPUCores(cpu_count); -#endif // OS_LINUX - return cpu_count; - }(); - return number; + /// TODO: add a setting for disabling that, similar to UseContainerSupport in java + cpu_count = getCGroupLimitedCPUCores(cpu_count); +#endif + + return cpu_count; } From 364f10aff5f0b11a45aeb24eb933f13a87fdef5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Apr 2022 22:56:51 +0200 Subject: [PATCH 09/47] Remove trash --- src/Common/getNumberOfPhysicalCPUCores.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 872a2464eb6..f415ee58e85 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -39,7 +39,7 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count) } #endif -unsigned getNumberOfPhysicalCPUCores() +static unsigned getNumberOfPhysicalCPUCoresImpl() { unsigned cpu_count = 0; // start with an invalid num @@ -72,3 +72,10 @@ unsigned getNumberOfPhysicalCPUCores() return cpu_count; } + +unsigned getNumberOfPhysicalCPUCores() +{ + /// Calculate once. + static auto res = getNumberOfPhysicalCPUCoresImpl(); + return res; +} From 416fa95b8744825ec1c00c072b20bd404145198a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Apr 2022 23:57:09 +0200 Subject: [PATCH 10/47] Remove "Arcadia" build system --- src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h | 2 -- src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp | 2 -- src/IO/ReadBufferFromAzureBlobStorage.cpp | 2 -- src/IO/ReadBufferFromAzureBlobStorage.h | 2 -- src/IO/WriteBufferFromAzureBlobStorage.cpp | 2 -- src/IO/WriteBufferFromAzureBlobStorage.h | 2 -- src/Server/CertificateReloader.h | 4 +--- 7 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 1cef6105d41..048daa7c9dc 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 243452353d3..128c7534b3c 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -1,6 +1,4 @@ -#if !defined(ARCADIA_BUILD) #include -#endif #include diff --git a/src/IO/ReadBufferFromAzureBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp index 0ce6db97437..593bd01023a 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp @@ -1,6 +1,4 @@ -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index 78d973747ba..d743e725bda 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp index 88882fcef65..0f8cfdf347d 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -1,6 +1,4 @@ -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE diff --git a/src/IO/WriteBufferFromAzureBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h index cbbfb577a91..75336c497eb 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/IO/WriteBufferFromAzureBlobStorage.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 88c732c2db6..f984b4e4691 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) -# include -#endif +#include #if USE_SSL From a0fe29e2433147756614133b1805703288aa077c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Apr 2022 23:59:49 +0200 Subject: [PATCH 11/47] Remove Arcadia --- src/Interpreters/ActionsVisitor.cpp | 18 ++++++++++++++++++ src/Interpreters/ActionsVisitor.h | 27 --------------------------- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 181ac9aed7e..3bad8fba270 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -252,6 +252,17 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co return header.cloneWithColumns(std::move(columns)); } + +namespace +{ + +/** Create a block for set from expression. + * 'set_element_types' - types of what are on the left hand side of IN. + * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). + * + * We need special implementation for ASTFunction, because in case, when we interpret + * large tuple or array as function, `evaluateConstantExpression` works extremely slow. + */ Block createBlockForSet( const DataTypePtr & left_arg_type, const ASTPtr & right_arg, @@ -295,6 +306,10 @@ Block createBlockForSet( return block; } +/** Create a block for set from literal. + * 'set_element_types' - types of what are on the left hand side of IN. + * 'right_arg' - Literal - Tuple or Array. + */ Block createBlockForSet( const DataTypePtr & left_arg_type, const std::shared_ptr & right_arg, @@ -346,6 +361,9 @@ Block createBlockForSet( return createBlockFromAST(elements_ast, set_element_types, context); } +} + + SetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, ContextPtr context, const SizeLimits & size_limits, PreparedSets & prepared_sets) diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 342cc9eef9d..d1558cb961c 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -27,33 +27,6 @@ SetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, ContextPtr context, const SizeLimits & limits, PreparedSets & prepared_sets); -/** Create a block for set from expression. - * 'set_element_types' - types of what are on the left hand side of IN. - * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). - * - * We need special implementation for ASTFunction, because in case, when we interpret - * large tuple or array as function, `evaluateConstantExpression` works extremely slow. - * - * Note: this and following functions are used in third-party applications in Arcadia, so - * they should be declared in header file. - * - */ -Block createBlockForSet( - const DataTypePtr & left_arg_type, - const std::shared_ptr & right_arg, - const DataTypes & set_element_types, - ContextPtr context); - -/** Create a block for set from literal. - * 'set_element_types' - types of what are on the left hand side of IN. - * 'right_arg' - Literal - Tuple or Array. - */ -Block createBlockForSet( - const DataTypePtr & left_arg_type, - const ASTPtr & right_arg, - const DataTypes & set_element_types, - ContextPtr context); - /** For ActionsVisitor * A stack of ExpressionActions corresponding to nested lambda expressions. * The new action should be added to the highest possible level. From cbeeb7ec4f5d055344973d908c716e5d166ea0b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 00:20:47 +0200 Subject: [PATCH 12/47] Remove Arcadia --- src/AggregateFunctions/UniquesHashSet.h | 2 +- src/Common/Config/ConfigProcessor.h | 3 +- src/Common/HashTable/Hash.h | 2 +- src/Common/OptimizedRegularExpression.cpp | 2 +- src/Common/StringUtils/CMakeLists.txt | 2 +- src/Common/ZooKeeper/ZooKeeperCommon.h | 2 +- src/Common/ZooKeeper/examples/CMakeLists.txt | 3 - .../examples/zk_many_watches_reconnect.cpp | 66 --------- src/Common/mysqlxx/mysqlxx/mysqlxx.h | 6 +- src/Common/parseAddress.h | 4 +- src/Functions/DateTimeTransforms.h | 2 +- src/Functions/FunctionFactory.h | 2 +- src/Functions/FunctionMathUnary.h | 2 +- src/Functions/FunctionsEmbeddedDictionaries.h | 2 +- src/Functions/FunctionsExternalDictionaries.h | 2 +- src/Functions/FunctionsVisitParam.h | 2 +- src/Functions/URL/FunctionsURL.h | 2 +- src/Functions/array/arrayEnumerateRanked.h | 2 +- src/Functions/array/arrayIndex.h | 4 +- src/Functions/extractTextFromHTML.cpp | 2 +- src/Functions/timeSlots.cpp | 2 +- src/IO/ReadHelpers.cpp | 2 +- src/IO/tests/gtest_s3_uri.cpp | 34 ++--- ...OptimizeIfWithConstantConditionVisitor.cpp | 2 +- src/Parsers/ASTFunction.cpp | 2 +- src/Parsers/ExpressionElementParsers.cpp | 2 +- .../Impl/JSONAsStringRowInputFormat.cpp | 1 + .../Algorithms/tests/gtest_graphite.cpp | 8 +- .../Transforms/ExpressionTransform.h | 2 +- src/Storages/examples/CMakeLists.txt | 13 -- .../examples/remove_symlink_directory.cpp | 35 ----- .../examples/transform_part_zk_nodes.cpp | 131 ------------------ 32 files changed, 49 insertions(+), 299 deletions(-) delete mode 100644 src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp delete mode 100644 src/Storages/examples/remove_symlink_directory.cpp delete mode 100644 src/Storages/examples/transform_part_zk_nodes.cpp diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h index 6837803c67d..8648f6e2500 100644 --- a/src/AggregateFunctions/UniquesHashSet.h +++ b/src/AggregateFunctions/UniquesHashSet.h @@ -59,7 +59,7 @@ /** This hash function is not the most optimal, but UniquesHashSet states counted with it, - * stored in many places on disks (in the Yandex.Metrika), so it continues to be used. + * stored in many places on disks (in many companies), so it continues to be used. */ struct UniquesHashSetDefaultHash { diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 6c642690945..0e1d0facf48 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -55,7 +55,7 @@ public: /// 2) Determine the includes file from the config: /path2/metrika.xml /// If this path is not configured, use /etc/metrika.xml /// 3) Replace elements matching the "" pattern with - /// "contents of the yandex/bar element in metrika.xml" + /// "contents of the clickhouse/bar element in metrika.xml" /// 4) If zk_node_cache is non-NULL, replace elements matching the "" pattern with /// "contents of the /bar ZooKeeper node". /// If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true. @@ -137,4 +137,3 @@ private: }; } - diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 5dbeeecf96b..3cf8978f418 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -13,7 +13,7 @@ * * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times. * This is because of following reasons: - * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits; + * - in Metrica web analytics system, visitor identifier is an integer that has timestamp with seconds resolution in lower bits; * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits; * - traffic is non-uniformly distributed across a day; * - we are using open-addressing linear probing hash tables that are most critical to hash function quality, diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 1464923e6ab..da348adbe31 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -28,7 +28,7 @@ void OptimizedRegularExpressionImpl::analyze( * in which all metacharacters are escaped, * and also if there are no '|' outside the brackets, * and also avoid substrings of the form `http://` or `www` and some other - * (this is the hack for typical use case in Yandex.Metrica). + * (this is the hack for typical use case in web analytics applications). */ const char * begin = regexp.data(); const char * pos = begin; diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt index 4eedbf4842b..57c196d335c 100644 --- a/src/Common/StringUtils/CMakeLists.txt +++ b/src/Common/StringUtils/CMakeLists.txt @@ -1,4 +1,4 @@ -# These files are located in separate library, because they are used by Yandex.Metrika code +# These files are located in separate library, because they are used by separate products # in places when no dependency on whole "dbms" library is possible. include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index e51bea3f7f8..532488c08f8 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -45,7 +45,7 @@ struct ZooKeeperResponse : virtual Response using ZooKeeperResponsePtr = std::shared_ptr; -/// Exposed in header file for Yandex.Metrica code. +/// Exposed in header file for some external code. struct ZooKeeperRequest : virtual Request { XID xid = 0; diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index 8bec951e24f..b449b172605 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -7,8 +7,5 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo add_executable(zkutil_test_async zkutil_test_async.cpp) target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log) -add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp) -target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_config) - add_executable (zookeeper_impl zookeeper_impl.cpp) target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log) diff --git a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp b/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp deleted file mode 100644 index cf819121234..00000000000 --- a/src/Common/ZooKeeper/examples/zk_many_watches_reconnect.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include -#include -#include - -/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706 -/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message -/// exceeds jute.maxbuffer (0xfffff by default). -/// This happens when the number of watches exceeds ~29000. -/// -/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g. -/// sudo ip6tables -A OUTPUT -d mtzoo01it.haze.yandex.net -j REJECT - -const size_t N_THREADS = 100; - -int main(int argc, char ** argv) -{ - try - { - if (argc != 3) - { - std::cerr << "usage: " << argv[0] << " " << std::endl; - return 3; - } - - DB::ConfigProcessor processor(argv[1], false, true); - auto config = processor.loadConfig().configuration; - zkutil::ZooKeeper zk(*config, "zookeeper", nullptr); - zkutil::EventPtr watch = std::make_shared(); - - /// NOTE: setting watches in multiple threads because doing it in a single thread is too slow. - size_t watches_per_thread = std::stoull(argv[2]) / N_THREADS; - std::vector threads; - for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread) - { - threads.emplace_back([&, i_thread] - { - for (size_t i = 0; i < watches_per_thread; ++i) - zk.exists("/clickhouse/nonexistent_node" + std::to_string(i * N_THREADS + i_thread), nullptr, watch); - }); - } - for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread) - threads[i_thread].join(); - - while (true) - { - std::cerr << "WAITING..." << std::endl; - sleep(10); - } - } - catch (Poco::Exception & e) - { - std::cerr << "Exception: " << e.displayText() << std::endl; - return 1; - } - catch (std::exception & e) - { - std::cerr << "std::exception: " << e.what() << std::endl; - return 3; - } - catch (...) - { - std::cerr << "Some exception" << std::endl; - return 2; - } -} diff --git a/src/Common/mysqlxx/mysqlxx/mysqlxx.h b/src/Common/mysqlxx/mysqlxx/mysqlxx.h index 785d4361fd7..0b6cf5e7c1a 100644 --- a/src/Common/mysqlxx/mysqlxx/mysqlxx.h +++ b/src/Common/mysqlxx/mysqlxx/mysqlxx.h @@ -10,10 +10,10 @@ /** 'mysqlxx' - very simple library for replacement of 'mysql++' library. * - * For whatever reason, in Yandex.Metrica, back in 2008, 'mysql++' library was used. + * For whatever reason, in Metrica web analytics system, back in 2008, 'mysql++' library was used. * There are the following shortcomings of 'mysql++': * 1. Too rich functionality: most of it is not used. - * 2. Low performance (when used for Yandex.Metrica). + * 2. Low performance (when used for Metrica). * * Low performance is caused by the following reasons: * @@ -50,7 +50,7 @@ * And for the sake of simplicity, some functions work only with certain assumptions, * or with slightly different semantic than in mysql++. * And we don't care about cross-platform usage of mysqlxx. - * These assumptions are specific for Yandex.Metrica. Your mileage may vary. + * These assumptions are specific for Metrica. Your mileage may vary. * * mysqlxx could not be considered as separate full-featured library, * because it is developed from the principle - "everything that we don't need is not implemented". diff --git a/src/Common/parseAddress.h b/src/Common/parseAddress.h index 86340982c67..297934a6379 100644 --- a/src/Common/parseAddress.h +++ b/src/Common/parseAddress.h @@ -13,8 +13,8 @@ namespace DB * Otherwise, an exception is thrown. * * Examples: - * yandex.ru - returns "yandex.ru" and default_port - * yandex.ru:80 - returns "yandex.ru" and 80 + * clickhouse.com - returns "clickhouse.com" and default_port + * clickhouse.com:80 - returns "clickhouse.com" and 80 * [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host. */ std::pair parseAddress(const std::string & str, UInt16 default_port); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 3a7baf45adc..5d1bcaf48cf 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -539,7 +539,7 @@ struct ToStartOfFifteenMinutesImpl using FactorTransform = ZeroTransform; }; -/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Yandex.Metrica. +/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Metrica web analytics system. struct TimeSlotImpl { static constexpr auto name = "timeSlot"; diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index 898a463cd58..13b14559ec4 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -40,7 +40,7 @@ public: registerFunction(name, &Function::create, case_sensitiveness); } - /// This function is used by YQL - internal Yandex product that depends on ClickHouse by source code. + /// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code. std::vector getAllNames() const; bool has(const std::string & name) const; diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index fa10c004e87..bd656db792b 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -10,7 +10,7 @@ #include "config_functions.h" -/** FastOps is a fast vector math library from Mikhail Parakhin (former Yandex CTO), +/** FastOps is a fast vector math library from Mikhail Parakhin, https://www.linkedin.com/in/mikhail-parakhin/ * Enabled by default. */ #if USE_FASTOPS diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index c6ea886b4a8..20be3ee3cce 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -33,7 +33,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -/** Functions using Yandex.Metrica dictionaries +/** Functions using deprecated dictionaries * - dictionaries of regions, operating systems, search engines. * * Climb up the tree to a certain level. diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 6a701d7b864..189ec7321c1 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -195,7 +195,7 @@ public: * that is the initiator of a distributed query, * in the case when the function will be invoked for real data only at the remote servers. * This feature is controversial and implemented specially - * for backward compatibility with the case in Yandex Banner System. + * for backward compatibility with the case in the Banner System application. */ if (input_rows_count == 0) return result_type->createColumn(); diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index 362c3bcd693..09fcf8659ed 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -12,7 +12,7 @@ /** Functions for retrieving "visit parameters". - * Visit parameters in Yandex.Metrika are a special kind of JSONs. + * Visit parameters in Metrica web analytics system are a special kind of JSONs. * These functions are applicable to almost any JSONs. * Implemented via templates from FunctionsStringSearch.h. * diff --git a/src/Functions/URL/FunctionsURL.h b/src/Functions/URL/FunctionsURL.h index 297b62ca256..a0f106742fb 100644 --- a/src/Functions/URL/FunctionsURL.h +++ b/src/Functions/URL/FunctionsURL.h @@ -45,7 +45,7 @@ namespace DB * Remove specified parameter from URL. * cutURLParameter(URL, name) * - * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs. + * Get array of URL 'hierarchy' as in web-analytics tree-like reports. See the docs. * URLHierarchy(URL) */ diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index d6a62a966ae..ac3936af0fd 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -15,7 +15,7 @@ // for better debug: #include /** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths. - * This is very unusual function made as a special order for Yandex.Metrica. + * This is very unusual function made as a special order for our dear customer - Metrica web analytics system. * * arrayEnumerateUniqRanked(['hello', 'world', 'hello']) = [1, 1, 2] * - it returns similar structured array containing number of occurrence of the corresponding value. diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 8b42b99cd69..0dbbe5e41b6 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -718,9 +718,7 @@ private: /** * Catches arguments of type LowCardinality(T) (left) and U (right). * - * The perftests - * https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html - * showed that the amount of action needed to convert the non-constant right argument to the index column + * The perftests showed that the amount of action needed to convert the non-constant right argument to the index column * (similar to the left one's) is significantly higher than converting the array itself to an ordinary column. * * So, in terms of performance it's more optimal to fall back to default implementation and catch only constant diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index b38ea74d6ce..f321a59f734 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -53,7 +53,7 @@ * * Usage example: * - * SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String') + * SELECT extractTextFromHTML(html) FROM url('https://github.com/ClickHouse/ClickHouse', RawBLOB, 'html String') * * - ClickHouse has embedded web browser. */ diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 34af410befa..a19ccf62565 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -29,7 +29,7 @@ namespace * For example, timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]. * This is necessary to search for hits that are part of the corresponding visit. * - * This is obsolete function. It was developed for Yandex.Metrica, but no longer used in Yandex. + * This is obsolete function. It was developed for Metrica web analytics system, but the art of its usage has been forgotten. * But this function was adopted by wider audience. */ diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index bf3cccccab8..8aee5713cdc 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -343,7 +343,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) && decoded_char != '"' && decoded_char != '`' /// MySQL style identifiers && decoded_char != '/' /// JavaScript in HTML - && decoded_char != '=' /// Yandex's TSKV + && decoded_char != '=' /// TSKV format invented somewhere && !isControlASCII(decoded_char)) { s.push_back('\\'); diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 7ee72069e57..20d19437c64 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -16,36 +16,36 @@ class S3UriTest : public testing::TestWithParam TEST(S3UriTest, validPatterns) { { - S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/")); - ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint); + S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/")); + ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("", uri.key); ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://s3.yandexcloud.net/jokserfn/")); - ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint); + S3::URI uri(Poco::URI("https://s3.amazonaws.com/jokserfn/")); + ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("", uri.key); ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://yandexcloud.net/bucket/")); - ASSERT_EQ("https://yandexcloud.net", uri.endpoint); + S3::URI uri(Poco::URI("https://amazonaws.com/bucket/")); + ASSERT_EQ("https://amazonaws.com", uri.endpoint); ASSERT_EQ("bucket", uri.bucket); ASSERT_EQ("", uri.key); ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/data")); - ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint); + S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/data")); + ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("data", uri.key); ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data")); - ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint); + S3::URI uri(Poco::URI("https://storage.amazonaws.com/jokserfn/data")); + ASSERT_EQ("https://storage.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("data", uri.key); ASSERT_EQ(false, uri.is_virtual_hosted_style); @@ -97,13 +97,13 @@ INSTANTIATE_TEST_SUITE_P( S3UriTest, testing::Values( "https:///", - "https://.s3.yandexcloud.net/key", - "https://s3.yandexcloud.net/key", - "https://jokserfn.s3yandexcloud.net/key", - "https://s3.yandexcloud.net//", - "https://yandexcloud.net/", - "https://yandexcloud.net//", - "https://yandexcloud.net//key")); + "https://.s3.amazonaws.com/key", + "https://s3.amazonaws.com/key", + "https://jokserfn.s3amazonaws.com/key", + "https://s3.amazonaws.com//", + "https://amazonaws.com/", + "https://amazonaws.com//", + "https://amazonaws.com//key")); } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 802bf4e43ce..28b19f6670d 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -28,7 +28,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } /// cast of numeric constant in condition to UInt8 - /// Note: this solution is ad-hoc and only implemented for yandex.metrica use case. + /// Note: this solution is ad-hoc and only implemented for metrica use case (one of the best customers). /// We should allow any constant condition (or maybe remove this optimization completely) later. if (const auto * function = condition->as()) { diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index f9a5c7be75f..b86929b054c 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -166,7 +166,7 @@ ASTPtr ASTFunction::toLiteral() const * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes. * With highlighting we make it clearly obvious. * - * Another case is regexp match. Suppose the user types match(URL, 'www.yandex.ru'). It often means that the user is unaware that . is a metacharacter. + * Another case is regexp match. Suppose the user types match(URL, 'www.clickhouse.com'). It often means that the user is unaware that . is a metacharacter. */ static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const IAST::FormatSettings & settings, const char * metacharacters) { diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index cd07e304a39..29c7846283e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1727,7 +1727,7 @@ const char * ParserAlias::restricted_keywords[] = "NOT", "OFFSET", "ON", - "ONLY", /// YQL synonym for ANTI. Note: YQL is the name of one of Yandex proprietary languages, completely unrelated to ClickHouse. + "ONLY", /// YQL's synonym for ANTI. Note: YQL is the name of one of proprietary languages, completely unrelated to ClickHouse. "ORDER", "PREWHERE", "RIGHT", diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 9bf1682b77e..e31006ff0f6 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -52,6 +52,7 @@ void JSONAsRowInputFormat::readSuffix() { assertChar(']', *buf); skipWhitespaceIfAny(*buf); + data_in_square_brackets = false; } if (!buf->eof() && *buf->position() == ';') { diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp index 1d739bf566a..9160ad6e0fa 100644 --- a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp +++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp @@ -149,7 +149,7 @@ TEST(GraphiteTest, testSelectPattern) using namespace std::literals; std::string - xml(R"END( + xml(R"END( \.sum$ @@ -210,7 +210,7 @@ TEST(GraphiteTest, testSelectPattern) - + )END"); // Retentions must be ordered by 'age' descending. @@ -370,7 +370,7 @@ TEST(GraphiteTest, testSelectPatternTyped) using namespace std::literals; std::string - xml(R"END( + xml(R"END( plain @@ -488,7 +488,7 @@ TEST(GraphiteTest, testSelectPatternTyped) - + )END"); // Retentions must be ordered by 'age' descending. diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index ea73c8fb1da..791c7d7ba73 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -12,7 +12,7 @@ class ActionsDAG; /** Executes a certain expression over the block. * The expression consists of column identifiers from the block, constants, common functions. - * For example: hits * 2 + 3, url LIKE '%yandex%' + * For example: hits * 2 + 3, url LIKE '%clickhouse%' * The expression processes each row independently of the others. */ class ExpressionTransform final : public ISimpleTransform diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index 103972a106f..ca85ca9d98f 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -1,6 +1,3 @@ -add_executable (remove_symlink_directory remove_symlink_directory.cpp) -target_link_libraries (remove_symlink_directory PRIVATE dbms) - add_executable (merge_selector merge_selector.cpp) target_link_libraries (merge_selector PRIVATE dbms) @@ -12,13 +9,3 @@ target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp) target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) - -add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp) -target_link_libraries (transform_part_zk_nodes - PRIVATE - boost::program_options - clickhouse_common_config - clickhouse_common_zookeeper - dbms - string_utils -) diff --git a/src/Storages/examples/remove_symlink_directory.cpp b/src/Storages/examples/remove_symlink_directory.cpp deleted file mode 100644 index db436c0a608..00000000000 --- a/src/Storages/examples/remove_symlink_directory.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; - -namespace DB -{ - namespace ErrorCodes - { - extern const int SYSTEM_ERROR; - } -} - -int main(int, char **) -try -{ - fs::path dir("./test_dir/"); - fs::create_directories(dir); - FS::createFile("./test_dir/file"); - - if (0 != symlink("./test_dir", "./test_link")) - DB::throwFromErrnoWithPath("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR); - - fs::rename("./test_link", "./test_link2"); - fs::remove_all("./test_link2"); - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(false) << "\n"; - return 1; -} diff --git a/src/Storages/examples/transform_part_zk_nodes.cpp b/src/Storages/examples/transform_part_zk_nodes.cpp deleted file mode 100644 index 3cbcc76190e..00000000000 --- a/src/Storages/examples/transform_part_zk_nodes.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -#include -#include - - -int main(int argc, char ** argv) -try -{ - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "produce help message") - ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") - ("path,p", boost::program_options::value()->required(), - "where to start") - ; - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Transform contents of part nodes in ZooKeeper to more compact storage scheme." << std::endl; - std::cout << "Usage: " << argv[0] << " [options]" << std::endl; - std::cout << desc << std::endl; - return 1; - } - - zkutil::ZooKeeper zookeeper(options.at("address").as()); - - std::string initial_path = options.at("path").as(); - - struct Node - { - Node( - std::string path_, - std::future get_future_, - std::future children_future_, - Node * parent_) - : path(std::move(path_)) - , get_future(std::move(get_future_)) - , children_future(std::move(children_future_)) - , parent(parent_) - { - } - - std::string path; - std::future get_future; - std::future children_future; - - Node * parent = nullptr; - std::future set_future; - }; - - std::list nodes_queue; - nodes_queue.emplace_back( - initial_path, zookeeper.asyncGet(initial_path), zookeeper.asyncGetChildren(initial_path), nullptr); - - for (auto it = nodes_queue.begin(); it != nodes_queue.end(); ++it) - { - Coordination::GetResponse get_response; - Coordination::ListResponse children_response; - try - { - get_response = it->get_future.get(); - children_response = it->children_future.get(); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNONODE) - continue; - throw; - } - - if (get_response.stat.ephemeralOwner) - continue; - - if (it->path.find("/parts/") != std::string::npos - && !endsWith(it->path, "/columns") - && !endsWith(it->path, "/checksums")) - { - /// The node is related to part. - - /// If it is the part in old format (the node contains children) - convert it to the new format. - if (!children_response.names.empty()) - { - auto part_header = DB::ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( - zookeeper.get(it->path + "/columns"), zookeeper.get(it->path + "/checksums")); - - Coordination::Requests ops; - ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/columns", -1)); - ops.emplace_back(zkutil::makeRemoveRequest(it->path + "/checksums", -1)); - ops.emplace_back(zkutil::makeSetRequest(it->path, part_header.toString(), -1)); - - it->set_future = zookeeper.asyncMulti(ops); - } - } - else - { - /// Recursively add children to the queue. - for (const auto & name : children_response.names) - { - std::string child_path = it->path == "/" ? it->path + name : it->path + '/' + name; - nodes_queue.emplace_back( - child_path, zookeeper.asyncGet(child_path), zookeeper.asyncGetChildren(child_path), - &(*it)); - } - } - } - - for (auto & node : nodes_queue) - { - if (node.set_future.valid()) - { - node.set_future.get(); - std::cerr << node.path << " changed!" << std::endl; - } - } -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -} From e128d89957816877944e37531a4e9acc0ed477b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 00:24:58 +0200 Subject: [PATCH 13/47] Remove Arcadia --- src/Disks/DiskWebServer.h | 2 +- .../clickhouse-diagnostics | 2 +- utils/graphite-rollup/rollup-tag-list.xml | 4 +- utils/graphite-rollup/rollup-typed.xml | 4 +- utils/graphite-rollup/rollup.xml | 4 +- utils/release/push_packages | 250 ------------------ utils/s3tools/s3uploader | 4 +- .../main.cpp | 2 +- .../main.cpp | 2 +- utils/zookeeper-dump-tree/main.cpp | 2 +- utils/zookeeper-remove-by-list/main.cpp | 2 +- 11 files changed, 14 insertions(+), 264 deletions(-) delete mode 100755 utils/release/push_packages diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 6341b582174..98f92fe5986 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -20,7 +20,7 @@ namespace ErrorCodes * * * web - * https://clickhouse-datasets.s3.yandex.net/disk-with-static-files-tests/test-hits/ + * https://clickhouse-datasets.s3.amazonaws.com/disk-with-static-files-tests/test-hits/ * * * diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics index 2fe67071c3c..cf65e4efbfb 100755 --- a/utils/clickhouse-diagnostics/clickhouse-diagnostics +++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics @@ -665,7 +665,7 @@ class DiagnosticsData: def _dump_wiki(self): """ - Dump diagnostic data in Yandex wiki format. + Dump diagnostic data in Wiki format. """ def _write_title(buffer, value): diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml index ef28f2089ad..edab2f16436 100644 --- a/utils/graphite-rollup/rollup-tag-list.xml +++ b/utils/graphite-rollup/rollup-tag-list.xml @@ -1,4 +1,4 @@ - + plain @@ -164,4 +164,4 @@ - + diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml index 0b27d43ece9..ace439dba4a 100644 --- a/utils/graphite-rollup/rollup-typed.xml +++ b/utils/graphite-rollup/rollup-typed.xml @@ -1,4 +1,4 @@ - + plain @@ -164,4 +164,4 @@ - + diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml index 641b0130509..2089605c8bf 100644 --- a/utils/graphite-rollup/rollup.xml +++ b/utils/graphite-rollup/rollup.xml @@ -1,4 +1,4 @@ - + \.sum$ @@ -144,4 +144,4 @@ - + diff --git a/utils/release/push_packages b/utils/release/push_packages deleted file mode 100755 index e25cb325c71..00000000000 --- a/utils/release/push_packages +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import argparse -import subprocess -import os -import logging -import shutil -import base64 -import pexpect - - -# Do nothing if keys are not provided -class GpgKey(object): - gnupg_dir = os.path.expanduser('~/.gnupg') - TEMPGNUPG_DIR = os.path.expanduser('~/.local/tempgnupg') - - def __init__(self, secret_key_path, public_key_path): - if secret_key_path and public_key_path: - with open(secret_key_path, 'r') as sec, open(public_key_path, 'r') as pub: - self._secret_key = sec.read() - self._public_key = pub.read() - else: - self._secret_key = None - self._public_key = None - - def __enter__(self): - if self._secret_key and self._public_key: - if os.path.exists(self.gnupg_dir): - shutil.move(self.gnupg_dir, self.TEMPGNUPG_DIR) - os.mkdir(self.gnupg_dir) - open(os.path.join(self.gnupg_dir, 'secring.gpg'), 'wb').write(base64.b64decode(self._secret_key)) - open(os.path.join(self.gnupg_dir, 'pubring.gpg'), 'wb').write(base64.b64decode(self._public_key)) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._secret_key and self._public_key: - shutil.rmtree(self.gnupg_dir) - if os.path.exists(self.TEMPGNUPG_DIR): - shutil.move(self.TEMPGNUPG_DIR, self.gnupg_dir) - - -class DebRelease(object): - - DUPLOAD_CONF_TEMPLATE = '\n\t'.join(( - "$cfg{{'{title}'}} = {{", - 'fqdn => "{fqdn}",', - 'method => "{method}",', - 'login => "{login}",', - 'incoming => "{incoming}",', - 'options => "{options}",', - 'dinstall_runs => {dinstall_runs},\n}};',)) - DUPLOAD_CONF_PATH = os.path.expanduser('~/.dupload.conf') - DUPLOAD_CONF_TMP_PATH = os.path.expanduser('~/.local/tmp_dupload.cnf') - - def __init__(self, dupload_config, login, ssh_key_path): - self.__config = {} - for repo, conf in dupload_config.items(): - d = { - "fqdn": conf["fqdn"], - "method": "scpb", - "login": login, - "incoming": conf["incoming"], - "dinstall_runs": 0, - "options": "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=3", - } - d.update(conf) - self.__config[repo] = d - print(self.__config) - self.ssh_key_path = ssh_key_path - - def __enter__(self): - if os.path.exists(self.DUPLOAD_CONF_PATH): - shutil.move(self.DUPLOAD_CONF_PATH, self.DUPLOAD_CONF_TMP_PATH) - self.__dupload_conf = open(self.DUPLOAD_CONF_PATH, 'w') - self.__dupload_conf.write('package config;\n\n$default_host = undef;\n\n' + '\n\n'.join([ - self.DUPLOAD_CONF_TEMPLATE.format(title=title, **values) - for title, values in self.__config.items()])) - self.__dupload_conf.write('\n') - self.__dupload_conf.close() - if self.ssh_key_path: - subprocess.check_call("ssh-add {}".format(self.ssh_key_path), shell=True) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if os.path.exists(self.DUPLOAD_CONF_TMP_PATH): - shutil.move(self.DUPLOAD_CONF_TMP_PATH, self.DUPLOAD_CONF_PATH) - else: - os.unlink(self.DUPLOAD_CONF_PATH) - - -class SSHConnection(object): - def __init__(self, user, host, ssh_key=None): - if ssh_key: - key_str = "-i {}".format(ssh_key) - else: - key_str = "" - - self.base_cmd = "ssh {key} {user}@{host}".format( - key=key_str, user=user, host=host) - - def execute(self, cmd): - logging.info("Executing remote cmd %s", cmd) - subprocess.check_call(self.base_cmd + ' "{cmd}"'.format(cmd=cmd), - shell=True) - - -def debsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user): - try: - with GpgKey(gpg_sec_key_path, gpg_pub_key_path): - cmd = ('debsign -k \'{key}\' -p"gpg --verbose --no-use-agent --batch ' - '--no-tty --passphrase {passphrase}" {path}/*.changes').format( - key=gpg_user, passphrase=gpg_passphrase, path=path) - logging.info("Build debsign cmd '%s'", cmd) - subprocess.check_call(cmd, shell=True) - logging.info("debsign finished") - except Exception as ex: - logging.error("Cannot debsign packages on path %s, with user key", path) - raise ex - -def rpmsign(path, gpg_passphrase, gpg_sec_key_path, gpg_pub_key_path, gpg_user): - try: - with GpgKey(gpg_sec_key_path, gpg_pub_key_path): - for package in os.listdir(path): - package_path = os.path.join(path, package) - logging.info("Signing %s", package_path) - proc = pexpect.spawn('rpm --resign -D "_signature gpg" -D "_gpg_name {username}" {package}'.format(username=gpg_user, package=package_path)) - proc.expect_exact("Enter pass phrase: ") - proc.sendline(gpg_passphrase) - proc.expect(pexpect.EOF) - logging.info("Signed successfully") - except Exception as ex: - logging.error("Cannot rpmsign packages on path %s, with user key", path) - raise ex - -def transfer_packages_scp(ssh_key, path, repo_user, repo_url, incoming_directory): - logging.info("Transferring packages via scp to %s", repo_url) - if ssh_key: - key_str = "-i {}".format(ssh_key) - else: - key_str = "" - subprocess.check_call('scp {key_str} {path}/* {user}@{repo}:{incoming}'.format( - path=path, user=repo_user, repo=repo_url, key_str=key_str, incoming=incoming_directory), shell=True) - logging.info("Transfer via scp finished") - -def transfer_packages_dupload(ssh_key, path, repo_user, repo_url, incoming_directory): - repo_short_name = repo_url.split('.')[0] - config = { - repo_short_name: { - "fqdn": repo_url, - "incoming": incoming_directory, - } - } - with DebRelease(config, repo_user, ssh_key): - logging.info("Duploading") - subprocess.check_call("dupload -f --nomail --to {repo} {path}".format(repo=repo_short_name, path=path), shell=True) - logging.info("Dupload finished") - - -def clear_old_incoming_packages(ssh_connection, user): - for pkg in ('deb', 'rpm', 'tgz'): - for release_type in ('stable', 'testing', 'prestable', 'lts'): - try: - ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/{release_type}/*".format( - user=user, pkg=pkg, release_type=release_type)) - except Exception: - logging.info("rm is not required") - - -def _get_incoming_path(repo_url, user=None, pkg_type=None, release_type=None): - if repo_url == 'repo.mirror.yandex.net': - return "/home/{user}/incoming/clickhouse/{pkg}/{release_type}".format( - user=user, pkg=pkg_type, release_type=release_type) - else: - return "/repo/{0}/mini-dinstall/incoming/".format(repo_url.split('.')[0]) - - -def _fix_args(args): - - if args.gpg_sec_key_path and not os.path.isabs(args.gpg_sec_key_path): - args.gpg_sec_key_path = os.path.join(os.getcwd(), args.gpg_sec_key_path) - - if args.gpg_pub_key_path and not os.path.isabs(args.gpg_pub_key_path): - args.gpg_pub_key_path = os.path.join(os.getcwd(), args.gpg_pub_key_path) - - if args.ssh_key_path and not os.path.isabs(args.ssh_key_path): - args.ssh_key_path = os.path.join(os.getcwd(), args.ssh_key_path) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="Program to push clickhouse packages to repository") - parser.add_argument('--deb-directory') - parser.add_argument('--rpm-directory') - parser.add_argument('--tgz-directory') - parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable', 'lts'), default='testing') - parser.add_argument('--ssh-key-path') - parser.add_argument('--gpg-passphrase', required=True) - parser.add_argument('--gpg-sec-key-path') - parser.add_argument('--gpg-pub-key-path') - parser.add_argument('--gpg-key-user', default='robot-clickhouse') - parser.add_argument('--repo-url', default='repo.mirror.yandex.net') - parser.add_argument('--repo-user', default='buildfarm') - - args = parser.parse_args() - if args.deb_directory is None and args.rpm_directory is None and args.tgz_directory is None: - parser.error('At least one package directory required') - - _fix_args(args) - - is_open_source = args.repo_url == 'repo.mirror.yandex.net' - ssh_connection = SSHConnection(args.repo_user, args.repo_url, args.ssh_key_path) - - packages = [] - if args.deb_directory: - debsign(args.deb_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user) - packages.append((args.deb_directory, 'deb')) - - if args.rpm_directory: - if not is_open_source: - raise Exception("Cannot upload .rpm package to {}".format(args.repo_url)) - rpmsign(args.rpm_directory, args.gpg_passphrase, args.gpg_sec_key_path, args.gpg_pub_key_path, args.gpg_key_user) - packages.append((args.rpm_directory, 'rpm')) - - if args.tgz_directory: - if not is_open_source: - raise Exception("Cannot upload .tgz package to {}".format(args.repo_url)) - packages.append((args.tgz_directory, 'tgz')) - - if is_open_source: - logging.info("Clearing old directory with incoming packages on buildfarm") - clear_old_incoming_packages(ssh_connection, args.repo_user) - logging.info("Incoming directory cleared") - - for package_path, package_type in packages: - logging.info("Processing path '%s' with package type %s", package_path, package_type) - incoming_directory = _get_incoming_path(args.repo_url, args.repo_user, package_type, args.release_type) - if package_type == "deb": - transfer_packages_dupload(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory) - else: - transfer_packages_scp(args.ssh_key_path, package_path, args.repo_user, args.repo_url, incoming_directory) - - logging.info("Running clickhouse install (it takes about (20-30 minutes)") - ssh_connection.execute("sudo /usr/sbin/ya-clickhouse-{0}-install".format(package_type)) - logging.info("Clickhouse installed") - logging.info("Pushing clickhouse to repo") - ssh_connection.execute("/usr/sbin/push2publicrepo.sh clickhouse") - logging.info("Push finished") - logging.info("Package '%s' pushed", package_type) - else: - transfer_packages_dupload(args.ssh_key_path, args.deb_directory, args.repo_user, args.repo_url, _get_incoming_path(args.repo_url)) diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader index 0627b19ea8f..33db76f57f4 100755 --- a/utils/s3tools/s3uploader +++ b/utils/s3tools/s3uploader @@ -121,8 +121,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="Simple tool for uploading datasets to clickhouse S3", usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES)) - parser.add_argument('--s3-api-url', default='s3.mds.yandex.net') - parser.add_argument('--s3-common-url', default='s3.yandex.net') + parser.add_argument('--s3-api-url', default='s3.amazonaws.com') + parser.add_argument('--s3-common-url', default='s3.amazonaws.com') parser.add_argument('--bucket-name', default='clickhouse-datasets') parser.add_argument('--dataset-name', required=True, help='Name of dataset, will be used in uploaded path') diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index 8550675cb9e..5c694ee04ef 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -214,7 +214,7 @@ try po::options_description desc("Allowed options"); desc.add_options() ("help,h", "show help") - ("zookeeper,z", po::value(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.yandex.ru:2181") + ("zookeeper,z", po::value(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.clickhouse.com:2181") ("path,p", po::value(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables") ("shard,s", po::value(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.") ("table,t", po::value(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.") diff --git a/utils/zookeeper-create-entry-to-download-part/main.cpp b/utils/zookeeper-create-entry-to-download-part/main.cpp index 1e86fe248ab..b92857929b7 100644 --- a/utils/zookeeper-create-entry-to-download-part/main.cpp +++ b/utils/zookeeper-create-entry-to-download-part/main.cpp @@ -11,7 +11,7 @@ try desc.add_options() ("help,h", "produce help message") ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") + "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181") ("path,p", boost::program_options::value()->required(), "path of replica queue to insert node (without trailing slash)") ("name,n", boost::program_options::value()->required(), "name of part to download") ; diff --git a/utils/zookeeper-dump-tree/main.cpp b/utils/zookeeper-dump-tree/main.cpp index 893056564bb..d85762df640 100644 --- a/utils/zookeeper-dump-tree/main.cpp +++ b/utils/zookeeper-dump-tree/main.cpp @@ -14,7 +14,7 @@ int main(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") + "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181") ("path,p", boost::program_options::value()->default_value("/"), "where to start") ("ctime,c", "print node ctime") diff --git a/utils/zookeeper-remove-by-list/main.cpp b/utils/zookeeper-remove-by-list/main.cpp index 2c97ffb4a70..cf194bd4861 100644 --- a/utils/zookeeper-remove-by-list/main.cpp +++ b/utils/zookeeper-remove-by-list/main.cpp @@ -12,7 +12,7 @@ try desc.add_options() ("help,h", "produce help message") ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") + "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181") ; boost::program_options::variables_map options; From ccdd0a60f74e5069efd2c73c332ae281f6c374aa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 00:28:56 +0200 Subject: [PATCH 14/47] Remove Arcadia --- base/base/phdr_cache.cpp | 2 +- base/base/phdr_cache.h | 2 +- base/daemon/BaseDaemon.h | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 20a755ed7a4..36a9b4f1f62 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -2,7 +2,7 @@ #pragma clang diagnostic ignored "-Wreserved-identifier" #endif -/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex. +/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/ #include diff --git a/base/base/phdr_cache.h b/base/base/phdr_cache.h index d2854ece0bc..b522710c4c4 100644 --- a/base/base/phdr_cache.h +++ b/base/base/phdr_cache.h @@ -1,6 +1,6 @@ #pragma once -/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex. +/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/ /** Collects all dl_phdr_info items and caches them in a static array. * Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 54a74369dce..152a431922c 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -76,10 +76,10 @@ public: /// return none if daemon doesn't exist, reference to the daemon otherwise static std::optional> tryGetInstance() { return tryGetInstance(); } - /// В Graphite компоненты пути(папки) разделяются точкой. - /// У нас принят путь формата root_path.hostname_yandex_ru.key - /// root_path по умолчанию one_min - /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total" + /// Graphite metric name has components separated by dots. + /// We used to have the following format: root_path.hostname_clickhouse_com.key + /// root_path - one_min by default + /// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total". template void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { From 40357637ec356ec7e65145424a48743b19c1a4ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 00:33:32 +0200 Subject: [PATCH 15/47] Remove Arcadia --- src/Common/tests/gtest_DateLUTImpl.cpp | 5 +- .../gtest_DateTime64_parsing_and_writing.cpp | 122 ------------------ utils/check-style/check-style | 5 - 3 files changed, 2 insertions(+), 130 deletions(-) diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 1eec83a6ec9..be96c190be3 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest) TEST(DateLUTTest, TimeValuesInMiddleOfRange) { - const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk"); + const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul"); const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday) - EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk"); + EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul"); EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3 EXPECT_EQ(lut.toDate(time), 1568581200); @@ -538,4 +538,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970, // {0, 0 + 11 * 3600 * 24 + 12, 11}, })) ); - diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp index c6208af2d5e..b2fd78fa8c2 100644 --- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp +++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp @@ -76,125 +76,3 @@ TEST_P(DateTime64StringParseBestEffortTest, parse) EXPECT_EQ(param.dt64, actual); } - - -// YYYY-MM-DD HH:MM:SS.NNNNNNNNN -INSTANTIATE_TEST_SUITE_P(Basic, - DateTime64StringParseTest, - ::testing::ValuesIn(std::initializer_list{ - { - "When subsecond part is missing from string it is set to zero.", - "2019-09-16 19:20:17", - 1568650817'000, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "When subsecond part is present in string, but it is zero, it is set to zero.", - "2019-09-16 19:20:17.0", - 1568650817'000, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "When scale is 0, subsecond part is not set.", - "2019-09-16 19:20:17", - 1568650817ULL, - 0, - DateLUT::instance("Europe/Minsk") - }, - { - "When scale is 0, subsecond part is 0 despite being present in string.", - "2019-09-16 19:20:17.123", - 1568650817ULL, - 0, - DateLUT::instance("Europe/Minsk") - }, - { - "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.", - "2019-09-16 19:20:17.123", - 1568650817'123, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.", - "2019-09-16 19:20:17.012", - 1568650817'012, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).", - "2019-09-16 19:20:17.123", - 1568650817'12300ULL, - 5, - DateLUT::instance("Europe/Minsk") - }, - { - "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.", - "2019-09-16 19:20:17.123", - 1568650817'1ULL, - 1, - DateLUT::instance("Europe/Minsk") - } - }) -); - -INSTANTIATE_TEST_SUITE_P(BestEffort, - DateTime64StringParseBestEffortTest, - ::testing::ValuesIn(std::initializer_list{ - { - "When subsecond part is unreasonably large, it truncated to given scale", - "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364", - 1568650817'123456ULL, - 6, - DateLUT::instance("Europe/Minsk") - } - }) -); - - -// TODO: add negative test cases for invalid strings, verifying that error is reported properly - -INSTANTIATE_TEST_SUITE_P(Basic, - DateTime64StringWriteTest, - ::testing::ValuesIn(std::initializer_list{ - { - "non-zero subsecond part on DateTime64 with scale of 3", - "2019-09-16 19:20:17.123", - 1568650817'123, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "non-zero subsecond part on DateTime64 with scale of 5", - "2019-09-16 19:20:17.12345", - 1568650817'12345ULL, - 5, - DateLUT::instance("Europe/Minsk") - }, - { - "Zero subsecond part is written to string", - "2019-09-16 19:20:17.000", - 1568650817'000ULL, - 3, - DateLUT::instance("Europe/Minsk") - }, - { - "When scale is 0, subsecond part (and separtor) is missing from string", - "2019-09-16 19:20:17", - 1568650817ULL, - 0, - DateLUT::instance("Europe/Minsk") - }, - { - "Subsecond part with leading zeroes is written to string correctly", - "2019-09-16 19:20:17.001", - 1568650817'001ULL, - 3, - DateLUT::instance("Europe/Minsk") - } - }) -); - diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 6ebf53cb932..20954781fbd 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -147,11 +147,6 @@ find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*. grep -vP $EXCLUDE_DIRS | xargs yamllint --config-file=$ROOT_PATH/.yamllint -# Machine translation to Russian is strictly prohibited -find $ROOT_PATH/docs/ru -name '*.md' | - grep -vP $EXCLUDE_DIRS | - xargs grep -l -F 'machine_translated: true' - # Tests should not be named with "fail" in their names. It makes looking at the results less convenient. find $ROOT_PATH/tests/queries -iname '*fail*' | grep -vP $EXCLUDE_DIRS | From 69b91c5410dc5d1153be96b2985a5b78462028e4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 02:08:06 +0200 Subject: [PATCH 16/47] Simplification #36313 --- src/Common/getNumberOfPhysicalCPUCores.cpp | 34 +++++++--------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index f415ee58e85..0df7a83cd53 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -5,9 +5,6 @@ # include # include #endif -#if USE_CPUID -# include -#endif #include @@ -41,32 +38,21 @@ static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count) static unsigned getNumberOfPhysicalCPUCoresImpl() { - unsigned cpu_count = 0; // start with an invalid num + unsigned cpu_count = std::thread::hardware_concurrency(); -#if USE_CPUID - cpu_raw_data_t raw_data; - cpu_id_t data; + /// Most of x86_64 CPUs have 2-way Hyper-Threading + /// Aarch64 and RISC-V don't have SMT so far. + /// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them. - /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method. - /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151. - /// Also, libcpuid gives strange result on Google Compute Engine VMs. - /// Example: - /// num_cores = 12, /// number of physical cores on current CPU socket - /// total_logical_cpus = 1, /// total number of logical cores on all sockets - /// num_logical_cpus = 24. /// number of logical cores on current CPU socket - /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1. - - if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0) - cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; +#if defined(__x86_64__) + /// Let's limit ourself to the number of physical cores. + /// But if the number of logical cores is small - maybe it is a small machine + /// or very limited cloud instance and it is reasonable to use all the cores. + if (cpu_count >= 8) + cpu_count /= 2; #endif - /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. - /// (Actually, only Aarch64 is supported). - if (cpu_count == 0) - cpu_count = std::thread::hardware_concurrency(); - #if defined(OS_LINUX) - /// TODO: add a setting for disabling that, similar to UseContainerSupport in java cpu_count = getCGroupLimitedCPUCores(cpu_count); #endif From 33ffdcaa4f21f6611eeea8b72c0cf71971baf86d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 02:18:22 +0200 Subject: [PATCH 17/47] Return two changes --- src/Common/tests/gtest_DateLUTImpl.cpp | 5 +- .../gtest_DateTime64_parsing_and_writing.cpp | 122 ++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index be96c190be3..1eec83a6ec9 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -90,10 +90,10 @@ TEST(DateLUTTest, makeDayNumTest) TEST(DateLUTTest, TimeValuesInMiddleOfRange) { - const DateLUTImpl & lut = DateLUT::instance("Asia/Istanbul"); + const DateLUTImpl & lut = DateLUT::instance("Europe/Minsk"); const time_t time = 1568650811; // 2019-09-16 19:20:11 (Monday) - EXPECT_EQ(lut.getTimeZone(), "Asia/Istanbul"); + EXPECT_EQ(lut.getTimeZone(), "Europe/Minsk"); EXPECT_EQ(lut.getOffsetAtStartOfEpoch(), 3600*3); // UTC-3 EXPECT_EQ(lut.toDate(time), 1568581200); @@ -538,3 +538,4 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970, // {0, 0 + 11 * 3600 * 24 + 12, 11}, })) ); + diff --git a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp index b2fd78fa8c2..c6208af2d5e 100644 --- a/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp +++ b/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp @@ -76,3 +76,125 @@ TEST_P(DateTime64StringParseBestEffortTest, parse) EXPECT_EQ(param.dt64, actual); } + + +// YYYY-MM-DD HH:MM:SS.NNNNNNNNN +INSTANTIATE_TEST_SUITE_P(Basic, + DateTime64StringParseTest, + ::testing::ValuesIn(std::initializer_list{ + { + "When subsecond part is missing from string it is set to zero.", + "2019-09-16 19:20:17", + 1568650817'000, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "When subsecond part is present in string, but it is zero, it is set to zero.", + "2019-09-16 19:20:17.0", + 1568650817'000, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "When scale is 0, subsecond part is not set.", + "2019-09-16 19:20:17", + 1568650817ULL, + 0, + DateLUT::instance("Europe/Minsk") + }, + { + "When scale is 0, subsecond part is 0 despite being present in string.", + "2019-09-16 19:20:17.123", + 1568650817ULL, + 0, + DateLUT::instance("Europe/Minsk") + }, + { + "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.", + "2019-09-16 19:20:17.123", + 1568650817'123, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.", + "2019-09-16 19:20:17.012", + 1568650817'012, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).", + "2019-09-16 19:20:17.123", + 1568650817'12300ULL, + 5, + DateLUT::instance("Europe/Minsk") + }, + { + "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.", + "2019-09-16 19:20:17.123", + 1568650817'1ULL, + 1, + DateLUT::instance("Europe/Minsk") + } + }) +); + +INSTANTIATE_TEST_SUITE_P(BestEffort, + DateTime64StringParseBestEffortTest, + ::testing::ValuesIn(std::initializer_list{ + { + "When subsecond part is unreasonably large, it truncated to given scale", + "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364", + 1568650817'123456ULL, + 6, + DateLUT::instance("Europe/Minsk") + } + }) +); + + +// TODO: add negative test cases for invalid strings, verifying that error is reported properly + +INSTANTIATE_TEST_SUITE_P(Basic, + DateTime64StringWriteTest, + ::testing::ValuesIn(std::initializer_list{ + { + "non-zero subsecond part on DateTime64 with scale of 3", + "2019-09-16 19:20:17.123", + 1568650817'123, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "non-zero subsecond part on DateTime64 with scale of 5", + "2019-09-16 19:20:17.12345", + 1568650817'12345ULL, + 5, + DateLUT::instance("Europe/Minsk") + }, + { + "Zero subsecond part is written to string", + "2019-09-16 19:20:17.000", + 1568650817'000ULL, + 3, + DateLUT::instance("Europe/Minsk") + }, + { + "When scale is 0, subsecond part (and separtor) is missing from string", + "2019-09-16 19:20:17", + 1568650817ULL, + 0, + DateLUT::instance("Europe/Minsk") + }, + { + "Subsecond part with leading zeroes is written to string correctly", + "2019-09-16 19:20:17.001", + 1568650817'001ULL, + 3, + DateLUT::instance("Europe/Minsk") + } + }) +); + From fa46e0d22fb28f62a7125f680c35b7a25edb8b9f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 02:54:21 +0200 Subject: [PATCH 18/47] Add an option for build profiling --- CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index dad9a25ab26..4f1a6c05730 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -314,6 +314,15 @@ if (ENABLE_BUILD_PATH_MAPPING) set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") endif () +option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF) +if (ENABLE_BUILD_PROFILING) + if (COMPILER_CLANG) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace") + else () + message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang") + endif () +endif () + if (${CMAKE_VERSION} VERSION_LESS "3.12.4") # CMake < 3.12 doesn't support setting 20 as a C++ standard version. # We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now. From 294efeccfe7532fe1c29052b7a908e245a0a76ec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 01:15:40 +0200 Subject: [PATCH 19/47] Fix clang-tidy-14 (part 1) --- contrib/sysroot | 2 +- src/Access/DiskAccessStorage.cpp | 3 ++- .../AggregateFunctionAvgWeighted.cpp | 4 ++-- src/AggregateFunctions/AggregateFunctionDeltaSum.cpp | 2 +- .../AggregateFunctionGroupArray.cpp | 2 +- .../AggregateFunctionGroupBitmap.cpp | 2 +- .../AggregateFunctionGroupUniqArray.cpp | 2 +- src/AggregateFunctions/AggregateFunctionQuantile.cpp | 4 ++-- src/AggregateFunctions/AggregateFunctionSparkbar.cpp | 2 +- .../AggregateFunctionStatisticsSimple.cpp | 2 +- src/AggregateFunctions/AggregateFunctionSum.cpp | 2 +- src/AggregateFunctions/AggregateFunctionSumCount.cpp | 2 +- src/AggregateFunctions/AggregateFunctionTopK.cpp | 2 +- src/Common/UTF8Helpers.cpp | 2 +- src/Dictionaries/IPAddressDictionary.cpp | 12 ++++++------ src/Functions/FunctionHelpers.cpp | 2 +- src/Functions/FunctionsLogical.cpp | 6 +++--- src/Functions/array/arrayElement.cpp | 2 +- src/Functions/if.cpp | 6 +++--- src/Functions/toLowCardinality.cpp | 1 - src/Interpreters/JIT/compileFunction.cpp | 2 +- src/Interpreters/convertFieldToType.cpp | 12 ++++++------ src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp | 3 ++- src/Storages/FileLog/StorageFileLog.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexGranuleBloomFilter.cpp | 2 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 2 +- 29 files changed, 46 insertions(+), 45 deletions(-) diff --git a/contrib/sysroot b/contrib/sysroot index bbcac834526..e9fb375d0a1 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit bbcac834526d90d1e764164b861be426891d1743 +Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 7393fcd8d36..a9eb27c291c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type) /// Create the 'need_rebuild_lists.mark' file. /// This file will be used later to find out if writing lists is successful or not. - std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)}; + std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)}; + out.close(); lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this}; lists_writing_thread_is_waiting = true; diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index ab6fdc8fd7e..4d7901a7fac 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept } template -static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) { const WhichDataType which(second_type); @@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a // Not using helper functions because there are no templates for binary decimal/numeric function. template -static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) { const WhichDataType which(first_type); diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp index f1c6e7c6112..3b43d9a85f8 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp @@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum( throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypePtr data_type = arguments[0]; + const DataTypePtr & data_type = arguments[0]; if (isInteger(data_type) || isFloat(data_type)) return AggregateFunctionPtr(createWithNumericType( diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 5a9fd778277..85075d5a4d6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -20,7 +20,7 @@ namespace { template