From ef81f3313f01389da3fed91563a6d42084741281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 6 May 2024 20:58:04 +0200 Subject: [PATCH 01/19] Optimize ActionsDAG::updateHeader --- src/Interpreters/ActionsDAG.cpp | 32 ++++++++++++++++++-------------- src/Interpreters/ActionsDAG.h | 2 +- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 30eb908330b..91e75dca2e1 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -708,27 +708,28 @@ static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Nod return res_column; } -Block ActionsDAG::updateHeader(Block header) const +Block ActionsDAG::updateHeader(const Block & header) const { IntermediateExecutionResult node_to_column; std::set pos_to_remove; { - std::unordered_map> input_positions; + size_t out = inputs.size(); /// Always out of range + std::unordered_map input_positions; + input_positions.reserve(inputs.size()); for (size_t pos = 0; pos < inputs.size(); ++pos) - input_positions[inputs[pos]->result_name].emplace_back(pos); + input_positions[inputs[pos]->result_name] = pos; for (size_t pos = 0; pos < header.columns(); ++pos) { const auto & col = header.getByPosition(pos); auto it = input_positions.find(col.name); - if (it != input_positions.end() && !it->second.empty()) + if (it != input_positions.end() && it->second != out) { - auto & list = it->second; pos_to_remove.insert(pos); - node_to_column[inputs[list.front()]] = col; - list.pop_front(); + node_to_column[inputs[it->second]] = col; + it->second = out; } } } @@ -746,18 +747,21 @@ Block ActionsDAG::updateHeader(Block header) const throw; } - if (isInputProjected()) - header.clear(); - else - header.erase(pos_to_remove); Block res; - + res.reserve(result_columns.size()); for (auto & col : result_columns) res.insert(std::move(col)); - for (auto && item : header) - res.insert(std::move(item)); + if (isInputProjected()) + return res; + + res.reserve(header.columns() - pos_to_remove.size()); + for (size_t i = 0; i < header.columns(); i++) + { + if (!pos_to_remove.contains(i)) + res.insert(header.data[i]); + } return res; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 8c0bcf8fdc0..64e6a0a7998 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -272,7 +272,7 @@ public: /// /// In addition, check that result constants are constants according to DAG. /// In case if function return constant, but arguments are not constant, materialize it. - Block updateHeader(Block header) const; + Block updateHeader(const Block & header) const; using IntermediateExecutionResult = std::unordered_map; static ColumnsWithTypeAndName evaluatePartialResult( From f34a47d3a670ae70494d5ed9028c108139d3f12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 6 May 2024 20:58:22 +0200 Subject: [PATCH 02/19] Stop clang-format trying to remove branches in for loops --- .clang-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index 52e01497187..7746106fcb5 100644 --- a/.clang-format +++ b/.clang-format @@ -89,7 +89,7 @@ PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 -RemoveBracesLLVM: true +RemoveBracesLLVM: false SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements From 36e23d67903057302f3832cfc4d731ecb45fd4ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 6 May 2024 20:58:44 +0200 Subject: [PATCH 03/19] Remove expensive logging in query stages --- src/Planner/Planner.cpp | 10 ++++++---- .../00002_log_and_exception_messages_formatting.sql | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index ce022cb0572..b40e23a9553 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1229,8 +1229,9 @@ void Planner::buildQueryPlanIfNeeded() if (query_plan.isInitialized()) return; - LOG_TRACE(getLogger("Planner"), "Query {} to stage {}{}", - query_tree->formatConvertedASTForErrorMessage(), + LOG_TRACE( + getLogger("Planner"), + "Query to stage {}{}", QueryProcessingStage::toString(select_query_options.to_stage), select_query_options.only_analyze ? " only analyze" : ""); @@ -1506,8 +1507,9 @@ void Planner::buildPlanForQueryNode() auto & mapping = join_tree_query_plan.query_node_to_plan_step_mapping; query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end()); - LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}", - query_tree->formatConvertedASTForErrorMessage(), + LOG_TRACE( + getLogger("Planner"), + "Query from stage {} to stage {}{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(select_query_options.to_stage), select_query_options.only_analyze ? " only analyze" : ""); diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 573321cfee1..bb32def5be9 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -210,7 +210,7 @@ select with 0.16 as threshold select 'noisy Trace messages', - greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}', 'Query {} to stage {}{}', 'Query {} from stage {} to stage {}{}') + greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}', 'Query to stage {}{}', 'Query from stage {} to stage {}{}') group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, r <= threshold ? '' : top_message.1; From 85096f037e8b7824bea805d802202286d567b188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 7 May 2024 12:02:23 +0200 Subject: [PATCH 04/19] updateHeader: Needs to keep track of all the matches --- src/Interpreters/ActionsDAG.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 91e75dca2e1..dda5e79d63b 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -21,6 +21,9 @@ #include #include +#include +#include + namespace DB { @@ -714,22 +717,24 @@ Block ActionsDAG::updateHeader(const Block & header) const std::set pos_to_remove; { - size_t out = inputs.size(); /// Always out of range - std::unordered_map input_positions; - input_positions.reserve(inputs.size()); + using inline_vector = absl::InlinedVector; // 64B, holding max 7 size_t elements inlined + absl::flat_hash_map input_positions; - for (size_t pos = 0; pos < inputs.size(); ++pos) - input_positions[inputs[pos]->result_name] = pos; + /// We insert from last to first in the inlinedVector so it's easier to pop_back matches later + for (size_t pos = inputs.size(); pos != 0; pos--) + input_positions[inputs[pos - 1]->result_name].emplace_back(pos - 1); for (size_t pos = 0; pos < header.columns(); ++pos) { const auto & col = header.getByPosition(pos); auto it = input_positions.find(col.name); - if (it != input_positions.end() && it->second != out) + if (it != input_positions.end() && !it->second.empty()) { pos_to_remove.insert(pos); - node_to_column[inputs[it->second]] = col; - it->second = out; + + auto & v = it->second; + node_to_column[inputs[v.back()]] = col; + v.pop_back(); } } } From 124b696bb224936210c14f944aaa241eb206e45a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 7 May 2024 12:02:41 +0200 Subject: [PATCH 05/19] Reduce excessive logging from joins --- src/Processors/Transforms/JoiningTransform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 0c0a86ce270..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -14,12 +14,12 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { - LOG_DEBUG(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->checkTypesOfKeys(header); join->initialize(header); ExtraBlockPtr tmp; join->joinBlock(header, tmp); - LOG_DEBUG(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } From c652d2e0b3fef12706255d23aadd1c0bd46156fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 7 May 2024 22:22:30 +0200 Subject: [PATCH 06/19] Fix useful warning by clang tidy --- src/Processors/Transforms/ExpressionTransform.cpp | 4 ++-- src/Processors/Transforms/ExpressionTransform.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 0d3341b000c..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -3,9 +3,9 @@ namespace DB { -Block ExpressionTransform::transformHeader(Block header, const ActionsDAG & expression) +Block ExpressionTransform::transformHeader(const Block & header, const ActionsDAG & expression) { - return expression.updateHeader(std::move(header)); + return expression.updateHeader(header); } diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 791c7d7ba73..cd2aae044d5 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -24,7 +24,7 @@ public: String getName() const override { return "ExpressionTransform"; } - static Block transformHeader(Block header, const ActionsDAG & expression); + static Block transformHeader(const Block & header, const ActionsDAG & expression); protected: void transform(Chunk & chunk) override; From c178539f391dfdcdf42d94a9a7bd0929b44bfdef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 8 May 2024 14:39:30 +0200 Subject: [PATCH 07/19] Raise log to trace --- src/Processors/Transforms/JoiningTransform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..36bd64b6083 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -14,12 +14,12 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { - LOG_TEST(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); + LOG_TRACE(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->checkTypesOfKeys(header); join->initialize(header); ExtraBlockPtr tmp; join->joinBlock(header, tmp); - LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); + LOG_TRACE(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } From 0e2b2ab53a1c040cdffee50da5c6d8272b9bbc15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 9 May 2024 12:53:34 +0200 Subject: [PATCH 08/19] Reduce join logging again --- src/Interpreters/HashJoin.cpp | 9 +++++++-- src/Processors/Transforms/JoiningTransform.cpp | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 9b05edbce36..61d07681c89 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -2427,10 +2427,15 @@ HashJoin::~HashJoin() { if (!data) { - LOG_TRACE(log, "{}Join data has been already released", instance_log_id); + LOG_TEST(log, "{}Join data has been already released", instance_log_id); return; } - LOG_TRACE(log, "{}Join data is being destroyed, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); + LOG_TEST( + log, + "{}Join data is being destroyed, {} bytes and {} rows in hash table", + instance_log_id, + getTotalByteCount(), + getTotalRowCount()); } template diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 36bd64b6083..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -14,12 +14,12 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { - LOG_TRACE(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->checkTypesOfKeys(header); join->initialize(header); ExtraBlockPtr tmp; join->joinBlock(header, tmp); - LOG_TRACE(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } From e31c54878e019c1dda96b599ada8a5f1ca0c99d3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 15 May 2024 09:34:08 +0200 Subject: [PATCH 09/19] Improve documentation for from_env and from_zk - Add a new paragraph to make it possible to find - Add documentation for default values when using these attributes --- docs/en/operations/configuration-files.md | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 089704705d0..0675e0edcb6 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -67,6 +67,8 @@ generates merged configuration file: ``` +### Using from_env and from_zk + To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`. Example with `$MAX_QUERY_SIZE = 150000`: @@ -93,6 +95,59 @@ which is equal to ``` +The same is possible using `from_zk`: + +``` xml + + + +``` + +``` +# clickhouse-keeper-client +/ :) touch /zk_configs +/ :) create /zk_configs/postgresql_port "9005" +/ :) get /zk_configs/postgresql_port +9005 +``` + +which is equal to + + +``` xml + + 9005 + +``` + +#### Default values for from_env and from_zk attributes + +It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`. + +With previous example, but `MAX_QUERY_SIZE` is unset: + +``` xml + + + + 150000 + + + +``` + +will take the default value + +``` xml + + + + 150000 + + + +``` + ## Substituting Configuration {#substitution} The config can define substitutions. There are two types of substitutions: From 9744feb95e7665b59a19344982ceea5f5b8afdff Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 11:02:10 +0200 Subject: [PATCH 10/19] Fix deadlock in parallel read buffer --- src/IO/ParallelReadBuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index 5718830db64..e6771235a8e 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -73,8 +73,9 @@ bool ParallelReadBuffer::addReaderToPool() auto worker = read_workers.emplace_back(std::make_shared(input, range_start, size)); - ++active_working_readers; schedule([this, my_worker = std::move(worker)]() mutable { readerThreadFunction(std::move(my_worker)); }, Priority{}); + /// increase number of workers only after we are sure that the reader was scheduled + ++active_working_readers; return true; } From 28ee7244ce3d387d68db2ad181b4baf27020ee27 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 15 May 2024 12:51:34 +0300 Subject: [PATCH 11/19] JOIN filter push down equivalent columns fix --- .../Optimizations/filterPushDown.cpp | 8 +- ...ter_push_down_equivalent_columns.reference | 91 +++++++++++++++++++ ...in_filter_push_down_equivalent_columns.sql | 33 +++++++ 3 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference create mode 100644 tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 5b3bcfc4468..8ca240b3e8b 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -262,10 +262,6 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: { const auto & left_table_key_name = join_clause.key_names_left[i]; const auto & right_table_key_name = join_clause.key_names_right[i]; - - if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name)) - continue; - const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); @@ -338,9 +334,9 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), filter->removesFilterColumn(), left_stream_available_columns_to_push_down, - left_stream_input_header.getColumnsWithTypeAndName(), + left_stream_input_header, right_stream_available_columns_to_push_down, - right_stream_input_header.getColumnsWithTypeAndName(), + right_stream_input_header, equivalent_columns_to_push_down, equivalent_left_stream_column_to_right_stream_column, equivalent_right_stream_column_to_left_stream_column); diff --git a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference new file mode 100644 index 00000000000..7058d36aaf9 --- /dev/null +++ b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference @@ -0,0 +1,91 @@ +-- { echoOn } + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users INNER JOIN users2 USING name WHERE users.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 +SELECT '--'; +-- +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users LEFT JOIN users2 USING name WHERE users.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 +SELECT '--'; +-- +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users RIGHT JOIN users2 USING name WHERE users2.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + __table2.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 diff --git a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql new file mode 100644 index 00000000000..0d365c94bce --- /dev/null +++ b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS users; +CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree order by (uid, name); + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +DROP TABLE IF EXISTS users2; +CREATE TABLE users2 (uid Int16, name String, age2 Int16) ENGINE=MergeTree order by (uid, name); + +INSERT INTO users2 VALUES (1231, 'John', 33); +INSERT INTO users2 VALUES (6666, 'Ksenia', 48); +INSERT INTO users2 VALUES (8888, 'Alice', 50); + +-- { echoOn } + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users INNER JOIN users2 USING name WHERE users.name ='Alice'; + +SELECT '--'; + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users LEFT JOIN users2 USING name WHERE users.name ='Alice'; + +SELECT '--'; + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users RIGHT JOIN users2 USING name WHERE users2.name ='Alice'; + +-- { echoOff } + +DROP TABLE users; +DROP TABLE users2; From d4b358179240da39de59cd05569ccf9aa8e1d4da Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 12:08:16 +0200 Subject: [PATCH 12/19] Allow allocation during job data destructor call --- src/Common/ThreadPool.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index b9029d9287d..e10ffe90fbd 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -437,6 +436,11 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ /// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them. if (shutdown) { + { + ALLOW_ALLOCATIONS_IN_SCOPE; + /// job can contain packaged_task which can set exception during destruction + job_data.reset(); + } job_is_done = true; continue; } From 0d00f73a1f89f478c5d0e364db02f92e7632ad96 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 15 May 2024 14:37:38 +0200 Subject: [PATCH 13/19] Disable test for specialed intel codec on arm --- tests/integration/test_non_default_compression/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index 18e2eb43813..a14a319b16b 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -2,7 +2,7 @@ import random import string import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm cluster = ClickHouseCluster(__file__) @@ -62,6 +62,11 @@ def start_cluster(): def test_preconfigured_default_codec(start_cluster): + if is_arm(): + pytest.skip( + "Skipping test because it's special test for Intel code (doesn't work on ARM)" + ) + for node in [node1, node2]: node.query( """ From 80f39c44d3ac1b6150d6c0952f3e4209fc971c4c Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 15 May 2024 15:15:15 +0000 Subject: [PATCH 14/19] Remove unnecessary logging statements in MergeJoinTransform.cpp --- src/Processors/Transforms/MergeJoinTransform.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 92f4110e9ed..159a3244fe9 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -338,8 +338,6 @@ static void prepareChunk(Chunk & chunk) void MergeJoinAlgorithm::initialize(Inputs inputs) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 0, inputs[0].chunk.dumpStructure()); - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 1, inputs[1].chunk.dumpStructure()); if (inputs.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "Two inputs are required, got {}", inputs.size()); @@ -351,8 +349,6 @@ void MergeJoinAlgorithm::initialize(Inputs inputs) void MergeJoinAlgorithm::consume(Input & input, size_t source_num) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, source_num, input.chunk.dumpStructure()); - if (input.skip_last_row) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported"); @@ -816,15 +812,9 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - for (size_t i = 0; i < 2; ++i) - { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: sampleColumns {} '{}'", __FILE__, __LINE__, i, cursors[i]->sampleBlock().dumpStructure()); - } - if (auto result = handleAllJoinState()) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: '{}'", __FILE__, __LINE__, result ? result->chunk.dumpStructure() : "NA"); return std::move(*result); } From bcd154a7c7bef56d4fc8db06deccd69be1025175 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 15 May 2024 20:09:29 +0300 Subject: [PATCH 15/19] Fixed tests --- .../03152_join_filter_push_down_equivalent_columns.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql index 0d365c94bce..645e89034d7 100644 --- a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql +++ b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS users; CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree order by (uid, name); From 8f5c8fdb27c2528b3ff4d0bad31b7047ec690949 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 15 May 2024 19:14:32 +0200 Subject: [PATCH 16/19] Proper fix --- tests/integration/test_non_default_compression/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index a14a319b16b..187cae5c957 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -62,11 +62,6 @@ def start_cluster(): def test_preconfigured_default_codec(start_cluster): - if is_arm(): - pytest.skip( - "Skipping test because it's special test for Intel code (doesn't work on ARM)" - ) - for node in [node1, node2]: node.query( """ @@ -260,6 +255,11 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): def test_preconfigured_deflateqpl_codec(start_cluster): + if is_arm(): + pytest.skip( + "Skipping test because it's special test for Intel code (doesn't work on ARM)" + ) + node6.query( """ CREATE TABLE compression_codec_multiple_with_key ( From 8d1977718b04ca86eb807c79fd52ba294edce54f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 15 May 2024 20:00:34 +0200 Subject: [PATCH 17/19] Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky --- .../0_stateless/02455_one_row_from_csv_memory_usage.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh index 5b54666a6a3..05de3f05562 100755 --- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -9,7 +9,7 @@ USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('none cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/ -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_threads=1, max_memory_usage=1000000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_threads=1, max_memory_usage=100000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" rm $USER_FILES_PATH/10m_rows.csv.xz From 2165cc318d3bacabf1b67f67d275509a4328dde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 13 May 2024 20:47:25 +0200 Subject: [PATCH 18/19] More helpful optimizations found by clang-tidy --- .clang-tidy | 2 ++ .../QueryPlan/SourceStepWithFilter.cpp | 4 ++-- src/Processors/Transforms/FilterTransform.cpp | 16 ++++++---------- src/Processors/Transforms/FilterTransform.h | 7 ++----- .../Transforms/TotalsHavingTransform.cpp | 2 +- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d53047f0552..e2f318562ec 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -138,6 +138,8 @@ Checks: [ # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 '-modernize-use-constraints', + # https://github.com/abseil/abseil-cpp/issues/1667 + '-clang-analyzer-optin.core.EnumCastOutOfRange' ] WarningsAsErrors: '*' diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index 5893c2aeb4f..ce5a59a92f9 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -21,7 +21,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo { if (prewhere_info->row_level_filter) { - block = prewhere_info->row_level_filter->updateHeader(std::move(block)); + block = prewhere_info->row_level_filter->updateHeader(block); auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); if (!row_level_column.type->canBeUsedInBooleanContext()) { @@ -36,7 +36,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo if (prewhere_info->prewhere_actions) { - block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + block = prewhere_info->prewhere_actions->updateHeader(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index b3be9246f43..0793bb3db5b 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -174,26 +174,22 @@ static std::unique_ptr combineFilterAndIndices( } Block FilterTransform::transformHeader( - Block header, - const ActionsDAG * expression, - const String & filter_column_name, - bool remove_filter_column) + const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column) { - if (expression) - header = expression->updateHeader(std::move(header)); + Block result = expression ? expression->updateHeader(header) : header; - auto filter_type = header.getByName(filter_column_name).type; + auto filter_type = result.getByName(filter_column_name).type; if (!filter_type->onlyNull() && !isUInt8(removeNullable(removeLowCardinality(filter_type)))) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column {} for filter. Must be UInt8 or Nullable(UInt8).", filter_type->getName(), filter_column_name); if (remove_filter_column) - header.erase(filter_column_name); + result.erase(filter_column_name); else - replaceFilterToConstant(header, filter_column_name); + replaceFilterToConstant(result, filter_column_name); - return header; + return result; } FilterTransform::FilterTransform( diff --git a/src/Processors/Transforms/FilterTransform.h b/src/Processors/Transforms/FilterTransform.h index bb72b72d619..23c694eed0b 100644 --- a/src/Processors/Transforms/FilterTransform.h +++ b/src/Processors/Transforms/FilterTransform.h @@ -22,11 +22,8 @@ public: const Block & header_, ExpressionActionsPtr expression_, String filter_column_name_, bool remove_filter_column_, bool on_totals_ = false, std::shared_ptr> rows_filtered_ = nullptr); - static Block transformHeader( - Block header, - const ActionsDAG * expression, - const String & filter_column_name, - bool remove_filter_column); + static Block + transformHeader(const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column); String getName() const override { return "FilterTransform"; } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 578d8cb8374..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -49,7 +49,7 @@ Block TotalsHavingTransform::transformHeader( if (expression) { - block = expression->updateHeader(std::move(block)); + block = expression->updateHeader(block); if (remove_filter) block.erase(filter_column_name); } From 5dbc1f347e9f00e951623553b5a56e974d874fd8 Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Wed, 15 May 2024 19:37:42 +0000 Subject: [PATCH 19/19] Fix logs test for binary data by converting it to a valid UTF8 string. --- tests/clickhouse-test | 16 ++++++++-------- ...002_log_and_exception_messages_formatting.sql | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index cbf0a6a577f..133d635f8a0 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2549,15 +2549,15 @@ def reportLogStats(args): WITH 240 AS mins, ( - SELECT (count(), sum(length(message))) + SELECT (count(), sum(length(toValidUTF8(message)))) FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time ) AS total SELECT count() AS count, round(count / (total.1), 3) AS `count_%`, - formatReadableSize(sum(length(message))) AS size, - round(sum(length(message)) / (total.2), 3) AS `size_%`, + formatReadableSize(sum(length(toValidUTF8(message)))) AS size, + round(sum(length(toValidUTF8(message))) / (total.2), 3) AS `size_%`, countDistinct(logger_name) AS uniq_loggers, countDistinct(thread_id) AS uniq_threads, groupArrayDistinct(toString(level)) AS levels, @@ -2580,8 +2580,8 @@ def reportLogStats(args): 240 AS mins SELECT count() AS count, - substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern, - substr(any(message), 1, 256) as runtime_message, + substr(replaceRegexpAll(toValidUTF8(message), '[^A-Za-z]+', ''), 1, 32) AS pattern, + substr(any(toValidUTF8(message)), 1, 256) as runtime_message, any((extract(source_file, '/[a-zA-Z0-9_]+\\.[a-z]+'), source_line)) as line FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = '' @@ -2596,7 +2596,7 @@ def reportLogStats(args): print("\n") query = """ - SELECT message_format_string, count(), any(message) AS any_message + SELECT message_format_string, count(), any(toValidUTF8(message)) AS any_message FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) @@ -2631,8 +2631,8 @@ def reportLogStats(args): 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', 'Attempt to read after eof', 'String size is too big ({}), maximum: {}' ) AS known_short_messages - SELECT count() AS c, message_format_string, substr(any(message), 1, 120), - min(if(length(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(message)) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate + SELECT count() AS c, message_format_string, substr(any(toValidUTF8(message)), 1, 120), + min(if(length(regexpExtract(toValidUTF8(message), '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(toValidUTF8(message))) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (length(message_format_string) < 16 diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 8e84d731592..ce900fb1741 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -194,7 +194,7 @@ select 'exceptions shorter than 30', (uniqExact(message_format_string) as c) <= max_messages, c <= max_messages ? [] : groupUniqArray(message_format_string) from logs - where message ilike '%DB::Exception%' and if(length(extract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages; + where message ilike '%DB::Exception%' and if(length(extract(toValidUTF8(message), '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(toValidUTF8(message))) < 30 + 26 and message_format_string not in known_short_messages; -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold WITH 0.30 as threshold @@ -252,7 +252,7 @@ select 'number of noisy messages', -- Each message matches its pattern (returns 0 rows) -- Note: maybe we should make it stricter ('Code:%Exception: '||s||'%'), but it's not easy because of addMessage select 'incorrect patterns', greatest(uniqExact(message_format_string), 15) from ( - select message_format_string, any(message) as any_message from logs + select message_format_string, any(toValidUTF8(message)) as any_message from logs where ((rand() % 8) = 0) and message not like (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') as s) and message not like (s || ' (skipped % similar messages)')