From 99456d2fa630455ebf1a2422b5e827ec056f4130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 14 Jun 2024 17:21:15 +0200 Subject: [PATCH 01/23] Fix unexpeced size of low cardinality column in functions --- src/Functions/IFunction.cpp | 101 ++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 31695fc95d5..9217071ca11 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -50,43 +50,78 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args) ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) { - size_t num_rows = input_rows_count; + /// We return the LC indexes so the LC can be reconstructed with the function result ColumnPtr indexes; - /// Find first LowCardinality column and replace it to nested dictionary. - for (auto & column : args) + size_t number_low_cardinality_columns = 0; + size_t last_low_cardinality = 0; + size_t number_const_columns = 0; + size_t number_full_columns = 0; + + for (size_t i = 0; i < args.size(); i++) { - if (const auto * low_cardinality_column = checkAndGetColumn(column.column.get())) + auto const & arg = args[i]; + if (checkAndGetColumn(arg.column.get())) { - /// Single LowCardinality column is supported now. - if (indexes) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function."); - - const auto * low_cardinality_type = checkAndGetDataType(column.type.get()); - - if (!low_cardinality_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incompatible type for LowCardinality column: {}", - column.type->getName()); - - if (can_be_executed_on_default_arguments) - { - /// Normal case, when function can be executed on values' default. - column.column = low_cardinality_column->getDictionary().getNestedColumn(); - indexes = low_cardinality_column->getIndexesPtr(); - } - else - { - /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). - /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. - auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); - column.column = dict_encoded.dictionary; - indexes = dict_encoded.indexes; - } - - num_rows = column.column->size(); - column.type = low_cardinality_type->getDictionaryType(); + number_low_cardinality_columns++; + last_low_cardinality = i; } + else if (checkAndGetColumn(arg.column.get())) + number_const_columns++; + else + number_full_columns++; + } + + if (!number_low_cardinality_columns && !number_const_columns) + return nullptr; + + if (number_full_columns > 0 || number_low_cardinality_columns > 1) + { + /// If there is a single full column, we can't replace the LC column with its dictionary, as it won't match + /// the size or order of the full columns. Same if there are 2 or more low cardinality columns + for (auto & arg : args) + { + if (const auto * column_lc = checkAndGetColumn(arg.column.get())) + { + arg.column = recursiveRemoveLowCardinality(arg.column); + chassert(arg.column->size() == input_rows_count); + + const auto * low_cardinality_type = checkAndGetDataType(arg.type.get()); + if (!low_cardinality_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", arg.type->getName()); + arg.type = recursiveRemoveLowCardinality(arg.type); + } + } + } + else if (number_low_cardinality_columns == 1) + { + auto & lc_arg = args[last_low_cardinality]; + + const auto * low_cardinality_type = checkAndGetDataType(lc_arg.type.get()); + if (!low_cardinality_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName()); + + const auto * low_cardinality_column = checkAndGetColumn(lc_arg.column.get()); + chassert(low_cardinality_column); + + if (can_be_executed_on_default_arguments) + { + /// Normal case, when function can be executed on values' default. + lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn(); + indexes = low_cardinality_column->getIndexesPtr(); + } + else + { + /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). + /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. + auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); + lc_arg.column = dict_encoded.dictionary; + indexes = dict_encoded.indexes; + } + + /// The new column will have a different number of rows, normally less but occasionally it might be more (NULL) + input_rows_count = lc_arg.column->size(); + lc_arg.type = low_cardinality_type->getDictionaryType(); } /// Change size of constants. @@ -94,7 +129,7 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( { if (const auto * column_const = checkAndGetColumn(column.column.get())) { - column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows); + column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count); column.type = recursiveRemoveLowCardinality(column.type); } } From 1c9df94a0a0b6fc9eb7ef302b47ac909c029e345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jun 2024 21:26:39 +0200 Subject: [PATCH 02/23] Replace fix with LOGICAL_ERROR --- src/Functions/IFunction.cpp | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 9217071ca11..76ae8f33fbd 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -47,6 +47,9 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args) return true; } +/// Replaces single low cardinality column in a function call by its dictionary +/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType +/// as it's only possible if there is one low cardinality column and, optionally, const columns ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) { @@ -77,21 +80,8 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (number_full_columns > 0 || number_low_cardinality_columns > 1) { - /// If there is a single full column, we can't replace the LC column with its dictionary, as it won't match - /// the size or order of the full columns. Same if there are 2 or more low cardinality columns - for (auto & arg : args) - { - if (const auto * column_lc = checkAndGetColumn(arg.column.get())) - { - arg.column = recursiveRemoveLowCardinality(arg.column); - chassert(arg.column->size() == input_rows_count); - - const auto * low_cardinality_type = checkAndGetDataType(arg.type.get()); - if (!low_cardinality_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", arg.type->getName()); - arg.type = recursiveRemoveLowCardinality(arg.type); - } - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}", + number_low_cardinality_columns, number_full_columns, number_const_columns); } else if (number_low_cardinality_columns == 1) { @@ -124,7 +114,7 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( lc_arg.type = low_cardinality_type->getDictionaryType(); } - /// Change size of constants. + /// Change size of constants for (auto & column : args) { if (const auto * column_const = checkAndGetColumn(column.column.get())) @@ -305,6 +295,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments(); const auto & dictionary_type = res_low_cardinality_type->getDictionaryType(); + /// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType + /// So there is only one low cardinality column (and optionally some const columns) and no full column ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count); From c185d60375dd0fbc18d062197875ba463326f44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jun 2024 21:59:14 +0200 Subject: [PATCH 03/23] Try an ugly fix --- src/Planner/PlannerActionsVisitor.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7a12d5d690d..13e96dc7016 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -485,16 +485,28 @@ public: return node; } - const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + [[nodiscard]] String addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) { auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end() && it->second->column) + return {node_name}; + if (it != node_name_to_node.end()) - return it->second; + { + /// There is a node with this name, but it doesn't have a column + /// This likely happens because we executed the query until WithMergeableState with a const node in the + /// WHERE clause. As the results of headers are materialized, the column was removed + /// Let's add a new column and keep this + String dupped_name{node_name + "_dupped"}; + const auto * node = &actions_dag.addColumn(column); + node_name_to_node[dupped_name] = node; + return dupped_name; + } const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; - return node; + return {node_name}; } template @@ -723,7 +735,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column.type = constant_type; column.column = column.type->createColumnConst(1, constant_literal); - actions_stack[0].addConstantIfNecessary(constant_node_name, column); + String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -732,8 +744,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); } - return {constant_node_name, Levels(0)}; - + return {final_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) @@ -862,7 +873,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma else column.column = std::move(column_set); - actions_stack[0].addConstantIfNecessary(column.name, column); + String final_name = actions_stack[0].addConstantIfNecessary(column.name, column); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -871,7 +882,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); } - return {column.name, Levels(0)}; + return {final_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) From fed573ffee1bb57f0b9cff7f6f1c7d5a542af51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 28 Jun 2024 17:12:10 +0200 Subject: [PATCH 04/23] Extend fix --- src/Planner/PlannerActionsVisitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 13e96dc7016..1c9553032c2 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -535,7 +535,7 @@ public: } private: - std::unordered_map node_name_to_node; + std::unordered_map node_name_to_node; ActionsDAG & actions_dag; QueryTreeNodePtr scope_node; }; @@ -741,7 +741,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); + actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); } return {final_name, Levels(0)}; @@ -879,7 +879,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); + actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); } return {final_name, Levels(0)}; From 74a5d56f1a404a7271b5a177e4902d0c319195d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 1 Jul 2024 19:32:22 +0200 Subject: [PATCH 05/23] Fix interpolate and add tests --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 +- src/Functions/IFunction.cpp | 6 ++ src/Planner/Planner.cpp | 7 +- src/Planner/PlannerActionsVisitor.cpp | 36 ++++++--- src/Planner/PlannerActionsVisitor.h | 9 ++- ..._no_aggregates_and_constant_keys.reference | 4 +- ...nality_group_by_distributed_plan.reference | 55 +++++++++++++ ..._cardinality_group_by_distributed_plan.sql | 80 +++++++++++++++++++ 8 files changed, 185 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference create mode 100644 tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 8860050c5b9..165256479ce 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4100,7 +4100,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo auto * column_to_interpolate = interpolate_node_typed.getExpression()->as(); if (!column_to_interpolate) - throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "INTERPOLATE can work only for identifiers, but {} is found", interpolate_node_typed.getExpression()->formatASTForErrorMessage()); auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 76ae8f33fbd..8b092ba9b6e 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -80,8 +80,14 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (number_full_columns > 0 || number_low_cardinality_columns > 1) { + /// This should not be possible but currently there are multiple tests in CI failing because of it + /// TODO: Fix those cases, then enable this exception +#if 0 throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}", number_low_cardinality_columns, number_full_columns, number_const_columns); +#else + return nullptr; +#endif } else if (number_low_cardinality_columns == 1) { diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2d42ed73223..93a4ea01ff0 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -742,7 +742,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, { auto & interpolate_node_typed = interpolate_node->as(); - PlannerActionsVisitor planner_actions_visitor(planner_context); + PlannerActionsVisitor planner_actions_visitor( + planner_context, + /* use_column_identifier_as_action_node_name_, (default value)*/ true, + /// Prefer the INPUT to CONSTANT nodes (actions must be non constant) + /* prefer_const_column_to_input */ false); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 1c9553032c2..a199420c9bd 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -485,19 +485,24 @@ public: return node; } - [[nodiscard]] String addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + [[nodiscard]] String + addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column, bool prefer_const_column_to_input) { + chassert(column.column != nullptr); auto it = node_name_to_node.find(node_name); - if (it != node_name_to_node.end() && it->second->column) + if (it != node_name_to_node.end() && (!prefer_const_column_to_input || it->second->column)) return {node_name}; if (it != node_name_to_node.end()) { /// There is a node with this name, but it doesn't have a column /// This likely happens because we executed the query until WithMergeableState with a const node in the - /// WHERE clause. As the results of headers are materialized, the column was removed + /// WHERE clause and, as the results of headers are materialized, the column was removed /// Let's add a new column and keep this String dupped_name{node_name + "_dupped"}; + if (node_name_to_node.find(dupped_name) != node_name_to_node.end()) + return dupped_name; + const auto * node = &actions_dag.addColumn(column); node_name_to_node[dupped_name] = node; return dupped_name; @@ -543,9 +548,11 @@ private: class PlannerActionsVisitorImpl { public: - PlannerActionsVisitorImpl(ActionsDAG & actions_dag, + PlannerActionsVisitorImpl( + ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_); + bool use_column_identifier_as_action_node_name_, + bool prefer_const_column_to_input_); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); @@ -605,14 +612,18 @@ private: const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; bool use_column_identifier_as_action_node_name; + bool prefer_const_column_to_input; }; -PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl( + ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_) + bool use_column_identifier_as_action_node_name_, + bool prefer_const_column_to_input_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) + , prefer_const_column_to_input(prefer_const_column_to_input_) { actions_stack.emplace_back(actions_dag, nullptr); } @@ -735,7 +746,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column.type = constant_type; column.column = column.type->createColumnConst(1, constant_literal); - String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column); + String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, prefer_const_column_to_input); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -873,7 +884,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma else column.column = std::move(column_set); - String final_name = actions_stack[0].addConstantIfNecessary(column.name, column); + String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, prefer_const_column_to_input); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -1019,14 +1030,17 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } -PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) +PlannerActionsVisitor::PlannerActionsVisitor( + const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_, bool prefer_const_column_to_input_) : planner_context(planner_context_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) + , prefer_const_column_to_input(prefer_const_column_to_input_) {} ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { - PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); + PlannerActionsVisitorImpl actions_visitor_impl( + actions_dag, planner_context, use_column_identifier_as_action_node_name, prefer_const_column_to_input); return actions_visitor_impl.visit(expression_node); } diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 6bb32047327..4bec2d2bb8a 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr; * During actions build, there is special handling for following functions: * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. * 2. For function `in` and its variants, already collected sets from planner context are used. + * 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have + * a column (a const column always has a column). This is for compatibility with previous headers. We disable this + * behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example) */ class PlannerActionsVisitor { public: - explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true); + explicit PlannerActionsVisitor( + const PlannerContextPtr & planner_context_, + bool use_column_identifier_as_action_node_name_ = true, + bool prefer_const_column_to_input_ = true); /** Add actions necessary to calculate expression node into expression dag. * Necessary actions are not added in actions dag output. @@ -42,6 +48,7 @@ public: private: const PlannerContextPtr planner_context; bool use_column_identifier_as_action_node_name = true; + bool prefer_const_column_to_input = true; }; /** Calculate query tree expression node action dag name and add them into node to name map. diff --git a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference index 63b8a9d14fc..fc77ed8a241 100644 --- a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference +++ b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference @@ -8,13 +8,13 @@ 40 41 -0 +41 2 42 2 42 43 -0 +43 11 11 diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference new file mode 100644 index 00000000000..1508c24f410 --- /dev/null +++ b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference @@ -0,0 +1,55 @@ +-- { echoOn } +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM system.one +GROUP BY '666'; +6.666.8 +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM remote('127.0.0.{1,1}', 'system.one') +GROUP BY '666'; +6.666.8 +-- https://github.com/ClickHouse/ClickHouse/issues/63006 +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM system.one +GROUP BY toNullable(6) + WITH ROLLUP +WITH TOTALS; +6 World666666 \N +6 World666666 \N + +6 World666666 \N +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM remote('127.0.0.1') +GROUP BY toNullable(6) + WITH ROLLUP + WITH TOTALS; +6 World666666 \N +6 World666666 \N + +6 World666666 \N +-- { echoOn } +SELECT + '%', + tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), + (toDecimal128(99.67, 6), 36, 61, 14) +FROM dist_03174 +WHERE dummy IN (0, '255') +GROUP BY + toNullable(13), + (99.67, 61, toLowCardinality(14)); +% ('%11default10113%AS%id_02%10101010') (99.67,36,61,14) +-- { echoOn } +SELECT + 38, + concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) +FROM set_index_not__fuzz_0 +GROUP BY + toNullable(3), + concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) +FORMAT Null +SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql new file mode 100644 index 00000000000..d397d30e285 --- /dev/null +++ b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql @@ -0,0 +1,80 @@ +-- There are various tests that check that group by keys don't propagate into functions replacing const arguments +-- by full (empty) columns + +DROP TABLE IF EXISTS dist_03174; +DROP TABLE IF EXISTS set_index_not__fuzz_0; + +-- https://github.com/ClickHouse/ClickHouse/issues/63006 + +SET allow_experimental_analyzer=1; + +-- { echoOn } +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM system.one +GROUP BY '666'; + +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM remote('127.0.0.{1,1}', 'system.one') +GROUP BY '666'; + +-- https://github.com/ClickHouse/ClickHouse/issues/63006 +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM system.one +GROUP BY toNullable(6) + WITH ROLLUP +WITH TOTALS; + +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM remote('127.0.0.1') +GROUP BY toNullable(6) + WITH ROLLUP + WITH TOTALS; + +-- https://github.com/ClickHouse/ClickHouse/issues/64945 +-- { echoOff } +CREATE TABLE dist_03174 AS system.one ENGINE = Distributed(test_cluster_two_shards, system, one, dummy); + +-- { echoOn } +SELECT + '%', + tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), + (toDecimal128(99.67, 6), 36, 61, 14) +FROM dist_03174 +WHERE dummy IN (0, '255') +GROUP BY + toNullable(13), + (99.67, 61, toLowCardinality(14)); + +-- Parallel replicas +-- { echoOff } +CREATE TABLE set_index_not__fuzz_0 +( + `name` String, + `status` Enum8('alive' = 0, 'rip' = 1), + INDEX idx_status status TYPE set(2) GRANULARITY 1 +) +ENGINE = MergeTree() +ORDER BY name; + +INSERT INTO set_index_not__fuzz_0 SELECT * FROM generateRandom() LIMIT 10; + +-- { echoOn } +SELECT + 38, + concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) +FROM set_index_not__fuzz_0 +GROUP BY + toNullable(3), + concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) +FORMAT Null +SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; + +-- { echoOff } +DROP TABLE IF EXISTS dist_03174; +DROP TABLE IF EXISTS set_index_not__fuzz_0; From 0e559ff7b94c55fe35d0db7174e523180f46e998 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Jul 2024 11:50:43 +0200 Subject: [PATCH 06/23] Better name for flag --- src/Planner/Planner.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 26 ++++++++++++++------------ src/Planner/PlannerActionsVisitor.h | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 93a4ea01ff0..260fbabf26a 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -746,7 +746,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, planner_context, /* use_column_identifier_as_action_node_name_, (default value)*/ true, /// Prefer the INPUT to CONSTANT nodes (actions must be non constant) - /* prefer_const_column_to_input */ false); + /* always_use_const_column_for_constant_nodes */ false); auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index a199420c9bd..e9e1f4edcc2 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -485,12 +485,12 @@ public: return node; } - [[nodiscard]] String - addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column, bool prefer_const_column_to_input) + [[nodiscard]] String addConstantIfNecessary( + const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes) { chassert(column.column != nullptr); auto it = node_name_to_node.find(node_name); - if (it != node_name_to_node.end() && (!prefer_const_column_to_input || it->second->column)) + if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column)) return {node_name}; if (it != node_name_to_node.end()) @@ -552,7 +552,7 @@ public: ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_, - bool prefer_const_column_to_input_); + bool always_use_const_column_for_constant_nodes_); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); @@ -612,18 +612,18 @@ private: const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; bool use_column_identifier_as_action_node_name; - bool prefer_const_column_to_input; + bool always_use_const_column_for_constant_nodes; }; PlannerActionsVisitorImpl::PlannerActionsVisitorImpl( ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_, - bool prefer_const_column_to_input_) + bool always_use_const_column_for_constant_nodes_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) - , prefer_const_column_to_input(prefer_const_column_to_input_) + , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) { actions_stack.emplace_back(actions_dag, nullptr); } @@ -746,7 +746,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column.type = constant_type; column.column = column.type->createColumnConst(1, constant_literal); - String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, prefer_const_column_to_input); + String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -884,7 +884,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma else column.column = std::move(column_set); - String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, prefer_const_column_to_input); + String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) @@ -1031,16 +1031,18 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } PlannerActionsVisitor::PlannerActionsVisitor( - const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_, bool prefer_const_column_to_input_) + const PlannerContextPtr & planner_context_, + bool use_column_identifier_as_action_node_name_, + bool always_use_const_column_for_constant_nodes_) : planner_context(planner_context_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) - , prefer_const_column_to_input(prefer_const_column_to_input_) + , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) {} ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { PlannerActionsVisitorImpl actions_visitor_impl( - actions_dag, planner_context, use_column_identifier_as_action_node_name, prefer_const_column_to_input); + actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes); return actions_visitor_impl.visit(expression_node); } diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 4bec2d2bb8a..1dbd149bc4b 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -37,7 +37,7 @@ public: explicit PlannerActionsVisitor( const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true, - bool prefer_const_column_to_input_ = true); + bool always_use_const_column_for_constant_nodes_ = true); /** Add actions necessary to calculate expression node into expression dag. * Necessary actions are not added in actions dag output. @@ -48,7 +48,7 @@ public: private: const PlannerContextPtr planner_context; bool use_column_identifier_as_action_node_name = true; - bool prefer_const_column_to_input = true; + bool always_use_const_column_for_constant_nodes = true; }; /** Calculate query tree expression node action dag name and add them into node to name map. From 2bc65fe2080260d16e27df965610197a38052705 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 09:44:05 +0000 Subject: [PATCH 07/23] Make ColumnSparse::updateWeakHash32 consistent with internal column. --- src/Columns/ColumnSparse.cpp | 38 +++++++++++++++++++++++++++++++++--- src/Columns/ColumnSparse.h | 3 +++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 809586d8810..ea4d23c1678 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -684,13 +685,26 @@ void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " "column size is {}, hash size is {}", _size, hash.getData().size()); - auto offset_it = begin(); + size_t values_size = values->size(); + WeakHash32 values_hash(values_size); + auto & hash_data = hash.getData(); + auto & values_hash_data = values_hash.getData(); + const auto & offsets_data = getOffsetsData(); + + if (getNumberOfDefaultRows() > 0) + values_hash_data[0] = hash_data[getFirstDefaultValueIndex()]; + + for (size_t i = 0; i < values_size; ++i) + values_hash_data[i + 1] = hash_data[offsets_data[i]]; + + values->updateWeakHash32(values_hash); + + auto offset_it = begin(); for (size_t i = 0; i < _size; ++i, ++offset_it) { size_t value_index = offset_it.getValueIndex(); - auto data_ref = values->getDataAt(value_index); - hash_data[i] = ::updateWeakHash32(reinterpret_cast(data_ref.data), data_ref.size, hash_data[i]); + hash_data[i] = values_hash_data[value_index]; } } @@ -807,6 +821,24 @@ size_t ColumnSparse::getValueIndex(size_t n) const return it - offsets_data.begin() + 1; } +size_t ColumnSparse::getFirstDefaultValueIndex() const +{ + if (getNumberOfDefaultRows() == 0) + return size(); + + const auto & offsets_data = getOffsetsData(); + size_t off_size = offsets_data.size(); + + if (off_size == 0 || offsets_data[0] > 0) + return 0; + + size_t idx = 0; + while (idx + 1 < off_size && offsets_data[idx] + 1 == offsets_data[idx + 1]) + ++idx; + + return offsets_data[idx] + 1; +} + ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const { const auto & offsets_data = getOffsetsData(); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 3e34d1de94a..4860f5171f7 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -173,6 +173,9 @@ public: /// O(log(offsets.size())) complexity, size_t getValueIndex(size_t n) const; + /// Returns an index of the first default value, or size() if there is no defaults. + size_t getFirstDefaultValueIndex() const; + const IColumn & getValuesColumn() const { return *values; } IColumn & getValuesColumn() { return *values; } From 24b9e1885216a62294e68a977437b4d4c62a12f9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 09:50:27 +0000 Subject: [PATCH 08/23] Add a test. --- ...l_window_finctions_and_column_sparce_bug.reference | 8 ++++++++ ...arallel_window_finctions_and_column_sparce_bug.sql | 11 +++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference create mode 100644 tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference new file mode 100644 index 00000000000..f11ec57a425 --- /dev/null +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference @@ -0,0 +1,8 @@ +false 1 1 +true 1 1 +--- +false 1 1 +false 1 2 +false 1 3 +true 1 1 +true 1 2 diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql new file mode 100644 index 00000000000..a4c0200813c --- /dev/null +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql @@ -0,0 +1,11 @@ +create table t(c Int32, d Bool) Engine=MergeTree order by c; +system stop merges t; + +insert into t values (1, 0); +insert into t values (1, 0); +insert into t values (1, 1); +insert into t values (1, 0)(1, 1); + +SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2; +SELECT '---'; +SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t order by d, c8 settings max_threads=2; From 902e548a2daf087bdb4363694a91bf665a7f7f9b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 09:52:00 +0000 Subject: [PATCH 09/23] Fix typo. --- ...205_parallel_window_finctions_and_column_sparse_bug.reference} | 0 ... => 03205_parallel_window_finctions_and_column_sparse_bug.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03205_parallel_window_finctions_and_column_sparce_bug.reference => 03205_parallel_window_finctions_and_column_sparse_bug.reference} (100%) rename tests/queries/0_stateless/{03205_parallel_window_finctions_and_column_sparce_bug.sql => 03205_parallel_window_finctions_and_column_sparse_bug.sql} (100%) diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference similarity index 100% rename from tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.reference rename to tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql similarity index 100% rename from tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparce_bug.sql rename to tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql From 04f3c29b60658938a93aa6de6f15f8c50121e53e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 13:47:38 +0000 Subject: [PATCH 10/23] Fix crash. --- src/Columns/ColumnSparse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index ea4d23c1678..0922eb5ea2d 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -695,7 +695,7 @@ void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const if (getNumberOfDefaultRows() > 0) values_hash_data[0] = hash_data[getFirstDefaultValueIndex()]; - for (size_t i = 0; i < values_size; ++i) + for (size_t i = 0; i + 1 < values_size; ++i) values_hash_data[i + 1] = hash_data[offsets_data[i]]; values->updateWeakHash32(values_hash); From 8acc5d90ca295c2ff6b574da15dec70268a19015 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 13:47:58 +0000 Subject: [PATCH 11/23] Add more test cases. --- ..._finctions_and_column_sparse_bug.reference | 75 ++++++++++++++++++ ...window_finctions_and_column_sparse_bug.sql | 76 +++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference index f11ec57a425..356329a392d 100644 --- a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference @@ -6,3 +6,78 @@ false 1 2 false 1 3 true 1 1 true 1 2 +--- +-755809149 0 +--- +1 -2081147898 +1 -1981899149 +2 -2051538534 +2 -1650266905 +3 -1975508531 +3 -1646738223 +4 -1700730666 +4 -1618912877 +5 -1465484835 +5 -1317193174 +6 -1458338029 +6 -1219769753 +7 -1450619195 +7 -1154269118 +8 -1365934326 +8 -1150980622 +9 -1203382363 +9 -1098155311 +10 -1197430632 +10 -841067875 +11 -1176267855 +11 -816935497 +12 -1020892864 +12 -599948807 +13 -991301833 +13 -526570556 +14 -685902265 +14 -504713125 +15 -653505826 +15 -411038390 +16 -451392958 +16 -331834394 +17 -262516786 +17 -176934810 +18 -222873194 +18 -2 +19 -153185515 +19 6 +20 -74234560 +20 255 +21 -41 +21 406615258 +22 -6 +22 541758331 +23 -5 +23 720985423 +24 -3 +24 745669725 +25 15 +25 897064234 +26 65535 +26 1116921321 +27 77089559 +27 1207796283 +28 100663045 +28 1603772265 +29 561061873 +29 1664059402 +30 643897141 +30 1688303275 +31 914629990 +31 1913361922 +32 1159852204 +32 1929066636 +33 1258218855 +33 1968095908 +34 1459407556 +34 2054878592 +35 1936334332 +35 2125075305 +36 1962246186 +37 2030467062 diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql index a4c0200813c..6e326d0a67f 100644 --- a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql @@ -9,3 +9,79 @@ insert into t values (1, 0)(1, 1); SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2; SELECT '---'; SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t order by d, c8 settings max_threads=2; +SELECT '---'; + +drop table t; + +create table t ( + c Int32 primary key , + s Bool , + w Float64 + ); + +system stop merges t; + +insert into t values(439499072,true,0),(1393290072,true,0); +insert into t values(-1317193174,false,0),(1929066636,false,0); +insert into t values(-2,false,0),(1962246186,true,0),(2054878592,false,0); +insert into t values(-1893563136,true,41.55); +insert into t values(-1338380855,true,-0.7),(-991301833,true,0),(-755809149,false,43.18),(-41,true,0),(3,false,0),(255,false,0),(255,false,0),(189195893,false,0),(195550885,false,9223372036854776000); + +SELECT * FROM ( +SELECT c, min(w) OVER (PARTITION BY s ORDER BY c ASC, s ASC, w ASC) +FROM t limit toUInt64(-1)) +WHERE c = -755809149; + +SELECT '---'; + +create table t_vkx4cc ( + c_ylzjpt Int32, + c_hqfr9 Bool , + ) engine = MergeTree order by c_ylzjpt; + +system stop merges t_vkx4cc; + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-2081147898, coalesce((NOT NOT(cast( (53 < 539704722) as Nullable(Bool)))), true)), (-1219769753, coalesce((true) and (false), false)), (-1981899149, coalesce(false, false)), (-1646738223, coalesce((NOT NOT(cast( (23.5 <= -26) as Nullable(Bool)))), false)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (255, coalesce(false, false)), (-1317193174, coalesce(false, false)), (-41, coalesce(true, false)), (1929066636, coalesce(false, true)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-1700730666, coalesce((NOT NOT(cast( (-2022515471055597472 AND -29) as Nullable(Bool)))), false)), (1664059402, coalesce((NOT NOT(cast( (-19643 >= -122) as Nullable(Bool)))), false)), (1688303275, coalesce((NOT NOT(cast( (737275892 < 105) as Nullable(Bool)))), true)), (406615258, coalesce((NOT NOT(cast( (-657730213 = 82.86) as Nullable(Bool)))), false)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-2, coalesce(false, false)), (1962246186, coalesce(true, false)), (-991301833, coalesce(true, true)), (2054878592, coalesce(false, false)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (643897141, coalesce((NOT NOT(cast( (-60 AND cast(null as Nullable(Int64))) as Nullable(Bool)))), true)), (-2051538534, coalesce(((-1616816511 between 332225780 and -1883087387)) or ((-573375170 between -1427445977 and 615586748)), false)), (77089559, coalesce((NOT NOT(cast( ((true) and (true) != 925456787) as Nullable(Bool)))), false)), (1116921321, coalesce((0 is NULL), true)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-816935497, coalesce(false, false)), (1207796283, coalesce((-129 between -5 and -5), false)), (-1365934326, coalesce(true, false)), (-1618912877, coalesce((NOT NOT(cast( (false >= 31833) as Nullable(Bool)))), false)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-331834394, coalesce((NOT NOT(cast( (-63 <= -1822810052) as Nullable(Bool)))), true)), (-1020892864, coalesce((NOT NOT(cast( (40.31 <= 8146037365746019777) as Nullable(Bool)))), true)), (-1150980622, coalesce(((94019304 between -730556489 and 32)) and ((-956354236 is not NULL)), true)), (-1203382363, coalesce(true, true)); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-653505826, coalesce((true) or (true), false)), (-1975508531, coalesce(((-796885845 between 65536 and cast(null as Nullable(Int32)))) or ((NOT NOT(cast( (-7467729336434250795 < 100.20) as Nullable(Bool))))), false)), (-1465484835, coalesce(((NOT NOT(cast( (19209 <= 75.96) as Nullable(Bool))))) or (true), false)), (1968095908, coalesce((NOT NOT(cast( (-1309960412156062327 > 13102) as Nullable(Bool)))), true)); + +alter table t_vkx4cc add column c_zosphq2t1 Float64; + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-153185515, coalesce((NOT NOT(cast( (1291639145 >= 30.22) as Nullable(Bool)))), false), -1.8), (-411038390, coalesce(((-762326135 between 16 and 177530758)) or (false), true), 26.34), (914629990, coalesce((-1125832977 is not NULL), true), 59.2), (541758331, coalesce(false, true), -255.1); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (2125075305, coalesce(false, false), 55.36), (-1176267855, coalesce(true, true), 55.45), (1459407556, coalesce((true) and ((NOT NOT(cast( (95.96 != 65) as Nullable(Bool))))), true), 85.80), (-1098155311, coalesce(false, false), 2147483649.9); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (6, coalesce((NOT NOT(cast( (1546334968 < -4) as Nullable(Bool)))), true), 57.42), (-5, coalesce((NOT NOT(cast( (59 AND 13) as Nullable(Bool)))), false), 65536.3), (100663045, coalesce((-1190355242 is not NULL), true), 73.80), (-451392958, coalesce((NOT NOT(cast( (false != -443845933) as Nullable(Bool)))), false), -4294967294.0); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (561061873, coalesce(true, false), 12.17), (-526570556, coalesce(false, false), 64.73), (-1450619195, coalesce(true, true), 54.33), (-3, coalesce(true, true), 52.9); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-504713125, coalesce(false, true), 27.58), (897064234, coalesce((836516994 between cast(null as Nullable(Int32)) and -1832647080), true), 9223372036854775809.2), (65535, coalesce(true, true), 4294967297.5), (-599948807, coalesce((false) or ((NOT NOT(cast( (6.52 = 65.49) as Nullable(Bool))))), false), 256.5); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-1650266905, coalesce((NOT NOT(cast( (-83 = -218055084) as Nullable(Bool)))), true), 1.9), (-841067875, coalesce(false, true), -126.5), (15, coalesce(((NOT NOT(cast( (cast(null as Nullable(Decimal)) = cast(null as Nullable(Int32))) as Nullable(Bool))))) or (true), true), 33.65), (1913361922, coalesce((NOT NOT(cast( (false AND 0) as Nullable(Bool)))), false), 6.4); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (1159852204, coalesce((-2057115045 is not NULL), false), 20.61), (-6, coalesce(true, true), 66.33), (-1154269118, coalesce(false, true), 8.89), (1258218855, coalesce(true, false), 19.80); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (1603772265, coalesce(false, true), 57.87), (-176934810, coalesce(false, true), 128.8), (-1458338029, coalesce((NOT NOT(cast( (20908 != (NOT NOT(cast( (cast(null as Nullable(Decimal)) <= (true) or ((NOT NOT(cast( (973511022 <= -112) as Nullable(Bool)))))) as Nullable(Bool))))) as Nullable(Bool)))), true), 76.54), (-262516786, coalesce((cast(null as Nullable(Int32)) is NULL), false), 21.49); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-1197430632, coalesce(true, false), 45.40), (-685902265, coalesce((NOT NOT(cast( (cast(null as Nullable(Decimal)) < cast(null as Nullable(Decimal))) as Nullable(Bool)))), true), 5.55), (1936334332, coalesce((-1565552735 is not NULL), false), 26.28), (2030467062, coalesce((NOT NOT(cast( (127.3 != cast(null as Nullable(Int32))) as Nullable(Bool)))), true), 89.50); + +insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (720985423, coalesce((NOT NOT(cast( (-451448940 = cast(null as Nullable(Decimal))) as Nullable(Bool)))), false), 52.65), (-222873194, coalesce(((-20 between -1419620477 and 1616455043)) or ((25624502 between 1312431316 and 1757361651)), false), 127.2), (745669725, coalesce((NOT NOT(cast( ((NOT NOT(cast( (cast(null as Nullable(UInt64)) <= 42) as Nullable(Bool)))) >= 3233811255032796928) as Nullable(Bool)))), false), 7.74), (-74234560, coalesce((NOT NOT(cast( (cast(null as Nullable(Decimal)) >= cast(null as Nullable(Decimal))) as Nullable(Bool)))), true), 19.25); + +SELECT DISTINCT + count(ref_0.c_zosphq2t1) over (partition by ref_0.c_hqfr9 order by ref_0.c_ylzjpt, ref_0.c_hqfr9, ref_0.c_zosphq2t1) as c0, + ref_0.c_ylzjpt as c1 +FROM + t_vkx4cc as ref_0 + order by c0, c1; From 4dea89df763fb9504cb681379d46830f4ec98db3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 13:50:58 +0000 Subject: [PATCH 12/23] Cleanup. --- src/Columns/ColumnSparse.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 0922eb5ea2d..98a66e87387 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include From 4a69bd78819c1bb97988fce96d17be99bbffe00e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 07:37:45 +0200 Subject: [PATCH 13/23] Fix terrible test @arthurpassos --- ...tiple_batches_array_inconsistent_offsets.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh index 83196458a84..c96531ffea9 100755 --- a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh +++ b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest +# Tags: long, no-ubsan, no-fasttest, no-parallel, no-asan, no-msan, no-tsan +# This test requires around 10 GB of memory and it is just too much. CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -121,9 +122,12 @@ echo "Parquet" #} DATA_FILE=$CUR_DIR/data_parquet/string_int_list_inconsistent_offset_multiple_batches.parquet -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (ints Array(Int64), strings Nullable(String)) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" \ No newline at end of file + +${CLICKHOUSE_LOCAL} --multiquery " +DROP TABLE IF EXISTS parquet_load; +CREATE TABLE parquet_load (ints Array(Int64), strings Nullable(String)) ENGINE = Memory; +INSERT INTO parquet_load FROM INFILE '$DATA_FILE'; +SELECT sum(cityHash64(*)) FROM parquet_load; +SELECT count() FROM parquet_load; +DROP TABLE parquet_load; +" From b8d6c68f5fdc691f18000878203971689d2f812c Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 17 Jul 2024 10:30:03 +0200 Subject: [PATCH 14/23] Update 02874_parquet_multiple_batches_array_inconsistent_offsets.reference --- ...arquet_multiple_batches_array_inconsistent_offsets.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference index ba63f2f7e9c..a9eae234dba 100644 --- a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference +++ b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference @@ -1,3 +1,3 @@ Parquet -e76a749f346078a6a43e0cbd25f0d18a - +3249508141921544766 400 From baade8baf45c7919f106160b8b8633d2c59e3ae8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Jul 2024 13:22:50 +0000 Subject: [PATCH 15/23] Replace updateWeakHash to getWeakHash --- src/Columns/ColumnAggregateFunction.cpp | 9 +-- src/Columns/ColumnAggregateFunction.h | 2 +- src/Columns/ColumnArray.cpp | 11 ++- src/Columns/ColumnArray.h | 2 +- src/Columns/ColumnCompressed.h | 3 +- src/Columns/ColumnConst.cpp | 14 +--- src/Columns/ColumnConst.h | 2 +- src/Columns/ColumnDecimal.cpp | 9 +-- src/Columns/ColumnDecimal.h | 2 +- src/Columns/ColumnDynamic.h | 5 +- src/Columns/ColumnFixedString.cpp | 10 +-- src/Columns/ColumnFixedString.h | 2 +- src/Columns/ColumnFunction.h | 5 +- src/Columns/ColumnLowCardinality.cpp | 26 +++--- src/Columns/ColumnLowCardinality.h | 4 +- src/Columns/ColumnMap.cpp | 4 +- src/Columns/ColumnMap.h | 2 +- src/Columns/ColumnNullable.cpp | 16 ++-- src/Columns/ColumnNullable.h | 2 +- src/Columns/ColumnObject.h | 3 +- src/Columns/ColumnSparse.cpp | 21 ++--- src/Columns/ColumnSparse.h | 2 +- src/Columns/ColumnString.cpp | 9 +-- src/Columns/ColumnString.h | 2 +- src/Columns/ColumnTuple.cpp | 11 ++- src/Columns/ColumnTuple.h | 2 +- src/Columns/ColumnVariant.cpp | 27 ++----- src/Columns/ColumnVariant.h | 2 +- src/Columns/ColumnVector.cpp | 9 +-- src/Columns/ColumnVector.h | 2 +- src/Columns/IColumn.h | 4 +- src/Columns/IColumnDummy.h | 4 +- src/Columns/IColumnUnique.h | 5 +- src/Columns/tests/gtest_weak_hash_32.cpp | 81 +++++++------------ src/Common/WeakHash.cpp | 22 +++++ src/Common/WeakHash.h | 5 +- src/Interpreters/ConcurrentHashJoin.cpp | 2 +- src/Interpreters/JoinUtils.cpp | 2 +- .../ScatterByPartitionTransform.cpp | 2 +- 39 files changed, 150 insertions(+), 197 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index cfd07c27765..33bd1266c90 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons hash.update(wbuf.str().c_str(), wbuf.str().size()); } -void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnAggregateFunction::getWeakHash32() const { auto s = data.size(); - if (hash.getData().size() != data.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), hash.getData().size()); - + WeakHash32 hash(s); auto & hash_data = hash.getData(); std::vector v; @@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const wbuf.finalize(); hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]); } + + return hash; } void ColumnAggregateFunction::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 1be7a862438..330a707b75c 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -177,7 +177,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5d7350f3a79..9203fb8042f 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const getData().updateHashWithValue(offset + i, hash); } -void ColumnArray::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnArray::getWeakHash32() const { auto s = offsets->size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", s, hash.getData().size()); + WeakHash32 hash(s); - WeakHash32 internal_hash(data->size()); - data->updateWeakHash32(internal_hash); + WeakHash32 internal_hash = data->getWeakHash32(); Offset prev_offset = 0; const auto & offsets_data = getOffsets(); @@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const prev_offset = offsets_data[i]; } + + return hash; } void ColumnArray::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 6cd3e2f6c3b..5e01b9144d7 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -82,7 +82,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 5e455709fec..19470113394 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -98,7 +99,7 @@ public: const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); } const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); } - void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); } + WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); } void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); } ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); } void expand(const Filter &, bool) override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index f2cea83db0e..84427e7be2b 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm { } -void ColumnConst::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnConst::getWeakHash32() const { - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - WeakHash32 element_hash(1); - data->updateWeakHash32(element_hash); - size_t data_hash = element_hash.getData()[0]; - - for (auto & value : hash.getData()) - value = static_cast(intHashCRC32(data_hash, value)); + WeakHash32 element_hash = data->getWeakHash32(); + return WeakHash32(s, element_hash.getData()[0]); } void ColumnConst::compareColumn( diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index b55a1f42037..65ce53687b9 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -204,7 +204,7 @@ public: data->updateHashWithValue(0, hash); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override { diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index cf413f790a7..ed9c699a841 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -76,13 +76,10 @@ void ColumnDecimal::updateHashWithValue(size_t n, SipHash & hash) const } template -void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnDecimal::getWeakHash32() const { auto s = data.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const T * begin = data.data(); const T * end = begin + s; @@ -94,6 +91,8 @@ void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const ++begin; ++hash_data; } + + return hash; } template diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 32efeb643a6..eb8a305a822 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -102,7 +102,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; #if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 9abddc7a26d..6f09abb945a 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -174,9 +175,9 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override + WeakHash32 getWeakHash32() const override { - variant_column->updateWeakHash32(hash); + return variant_column->getWeakHash32(); } void updateHashFast(SipHash & hash) const override diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 1c2de203a94..4d17eb0bebd 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const hash.update(reinterpret_cast(&chars[n * index]), n); } -void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnFixedString::getWeakHash32() const { auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, " - "hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); @@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const pos += n; ++hash_data; } + + return hash; } void ColumnFixedString::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 6e88136fc50..630c6c1c0a6 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -133,7 +133,7 @@ public: void updateHashWithValue(size_t index, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index ba924c49a82..dcd67aecad7 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -130,9 +131,9 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName()); } - void updateWeakHash32(WeakHash32 &) const override + WeakHash32 getWeakHash32() const override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName()); } void updateHashFast(SipHash &) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index eb694a10b0f..49ce948bf78 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -7,8 +7,7 @@ #include #include #include -#include "Storages/IndicesDescription.h" -#include "base/types.h" +#include #include #include @@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const return getDictionary().skipSerializedInArena(pos); } -void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnLowCardinality::getWeakHash32() const { - auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - const auto & dict = getDictionary().getNestedColumn(); - WeakHash32 dict_hash(dict->size()); - dict->updateWeakHash32(dict_hash); - - idx.updateWeakHash(hash, dict_hash); + WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32(); + return idx.getWeakHash(dict_hash); } void ColumnLowCardinality::updateHashFast(SipHash & hash) const @@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const return contains; } -void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const +WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const { + WeakHash32 hash(positions->size()); auto & hash_data = hash.getData(); - auto & dict_hash_data = dict_hash.getData(); + const auto & dict_hash_data = dict_hash.getData(); auto update_weak_hash = [&](auto x) { @@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & auto size = data.size(); for (size_t i = 0; i < size; ++i) - hash_data[i] = static_cast(intHashCRC32(dict_hash_data[data[i]], hash_data[i])); + hash_data[i] = dict_hash_data[data[i]]; }; callForType(std::move(update_weak_hash), size_of_type); + return hash; } void ColumnLowCardinality::Index::collectSerializedValueSizes( diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e99be07cd8d..fb0c1237fcf 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -111,7 +111,7 @@ public: getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash &) const override; @@ -325,7 +325,7 @@ public: bool containsDefault() const; - void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const; + WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const; void collectSerializedValueSizes(PaddedPODArray & sizes, const PaddedPODArray & dict_sizes) const; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 2dffddb2dc9..08d7734ac6b 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const nested->updateHashWithValue(n, hash); } -void ColumnMap::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnMap::getWeakHash32() const { - nested->updateWeakHash32(hash); + return nested->getWeakHash32(); } void ColumnMap::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index a54071a2974..29275e1b5f7 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -64,7 +64,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; #if !defined(ABORT_ON_LOGICAL_ERROR) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index f060e74b315..64e99a3bbe8 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const getNestedColumn().updateHashWithValue(n, hash); } -void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnNullable::getWeakHash32() const { auto s = size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - WeakHash32 old_hash = hash; - nested_column->updateWeakHash32(hash); + WeakHash32 hash = nested_column->getWeakHash32(); const auto & null_map_data = getNullMapData(); auto & hash_data = hash.getData(); - auto & old_hash_data = old_hash.getData(); - /// Use old data for nulls. + /// Use defualt for nulls. for (size_t row = 0; row < s; ++row) if (null_map_data[row]) - hash_data[row] = old_hash_data[row]; + hash_data[row] = WeakHash32::kDefaultInitialValue; + + return hash; } void ColumnNullable::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index a6d0483e527..15bbd8c3b57 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -133,7 +133,7 @@ public: void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; // Special function for nullable minmax index diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index fadf2e18779..21607e003f2 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -252,7 +253,7 @@ public: const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } - void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } + WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); } void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 98a66e87387..0d103a263dd 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -678,26 +678,13 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const values->updateHashWithValue(getValueIndex(n), hash); } -void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnSparse::getWeakHash32() const { - if (hash.getData().size() != _size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", _size, hash.getData().size()); - - size_t values_size = values->size(); - WeakHash32 values_hash(values_size); + WeakHash32 values_hash = values->getWeakHash32(); + WeakHash32 hash(size()); auto & hash_data = hash.getData(); auto & values_hash_data = values_hash.getData(); - const auto & offsets_data = getOffsetsData(); - - if (getNumberOfDefaultRows() > 0) - values_hash_data[0] = hash_data[getFirstDefaultValueIndex()]; - - for (size_t i = 0; i + 1 < values_size; ++i) - values_hash_data[i + 1] = hash_data[offsets_data[i]]; - - values->updateWeakHash32(values_hash); auto offset_it = begin(); for (size_t i = 0; i < _size; ++i, ++offset_it) @@ -705,6 +692,8 @@ void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const size_t value_index = offset_it.getValueIndex(); hash_data[i] = values_hash_data[value_index]; } + + return hash; } void ColumnSparse::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 4860f5171f7..a5d4d788b17 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -139,7 +139,7 @@ public: void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 1eda9714d62..4accfbe8f41 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const return res; } -void ColumnString::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnString::getWeakHash32() const { auto s = offsets.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); @@ -130,6 +127,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const prev_offset = offset; ++hash_data; } + + return hash; } diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 602ffac65e8..faaaa8848ca 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -212,7 +212,7 @@ public: hash.update(reinterpret_cast(&chars[offset]), string_size); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override { diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9b822d7f570..cb0b05d2154 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -308,16 +308,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const column->updateHashWithValue(n, hash); } -void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnTuple::getWeakHash32() const { auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); for (const auto & column : columns) - column->updateWeakHash32(hash); + hash.update(column->getWeakHash32()); + + return hash; } void ColumnTuple::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 38e479791d4..2fafd93f776 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -81,7 +81,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ee5de4c2dde..8fd6e1bbac1 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash); } -void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnVariant::getWeakHash32() const { auto s = size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - /// If we have only NULLs, keep hash unchanged. if (hasOnlyNulls()) - return; + return WeakHash32(s); /// Optimization for case when there is only 1 non-empty variant and no NULLs. /// In this case we can just calculate weak hash for this variant. if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - variants[*non_empty_local_discr]->updateWeakHash32(hash); - return; - } + return variants[*non_empty_local_discr]->getWeakHash32(); /// Calculate weak hash for all variants. std::vector nested_hashes; for (const auto & variant : variants) - { - WeakHash32 nested_hash(variant->size()); - variant->updateWeakHash32(nested_hash); - nested_hashes.emplace_back(std::move(nested_hash)); - } + nested_hashes.emplace_back(variant->getWeakHash32()); /// For each row hash is a hash of corresponding row from corresponding variant. + WeakHash32 hash(s); auto & hash_data = hash.getData(); const auto & local_discriminators_data = getLocalDiscriminators(); const auto & offsets_data = getOffsets(); @@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const Discriminator discr = local_discriminators_data[i]; /// Update hash only for non-NULL values if (discr != NULL_DISCRIMINATOR) - { - auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]]; - hash_data[i] = static_cast(hashCRC32(nested_hash, hash_data[i])); - } + hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]]; } + + return hash; } void ColumnVariant::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index d91b8e93a7d..94f3066e676 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -213,7 +213,7 @@ public: const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 19d1b800961..185a1e0f615 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -73,13 +73,10 @@ void ColumnVector::updateHashWithValue(size_t n, SipHash & hash) const } template -void ColumnVector::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnVector::getWeakHash32() const { auto s = data.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const T * begin = data.data(); const T * end = begin + s; @@ -91,6 +88,8 @@ void ColumnVector::updateWeakHash32(WeakHash32 & hash) const ++begin; ++hash_data; } + + return hash; } template diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 3a0acf5e312..c01778ecf32 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -114,7 +114,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 4b6f34e5aa2..3798d3b7466 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -300,10 +300,10 @@ public: /// passed bytes to hash must identify sequence of values unambiguously. virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0; - /// Update hash function value. Hash is calculated for each element. + /// Get hash function value. Hash is calculated for each element. /// It's a fast weak hash function. Mainly need to scatter data between threads. /// WeakHash32 must have the same size as column. - virtual void updateWeakHash32(WeakHash32 & hash) const = 0; + virtual WeakHash32 getWeakHash32() const = 0; /// Update state of hash with all column. virtual void updateHashFast(SipHash & hash) const = 0; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index c19fb704d9b..b18f4fdb302 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -63,8 +64,9 @@ public: { } - void updateWeakHash32(WeakHash32 & /*hash*/) const override + WeakHash32 getWeakHash32() const override { + return WeakHash32(s); } void updateHashFast(SipHash & /*hash*/) const override diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 3398452b7ee..1b86204f5b1 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -166,9 +167,9 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique."); } - void updateWeakHash32(WeakHash32 &) const override + WeakHash32 getWeakHash32() const override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique."); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique."); } void updateHashFast(SipHash &) const override diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index 2c95998761b..3143d0ff83c 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32) data.push_back(i << 16u); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32) data.push_back(i << 16); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64) data.push_back(i << 32u); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32) data.push_back(i << 16); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); - WeakHash32 hash(col_arr->size()); - col_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); - WeakHash32 hash(col_arr->size()); - col_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2)); - WeakHash32 hash(col_arr_arr->size()); - col_arr_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst) auto col_const = ColumnConst::create(std::move(inner_col), 256); - WeakHash32 hash(col_const->size()); - col_const->updateWeakHash32(hash); + WeakHash32 hash = col_const->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable) auto col_null = ColumnNullable::create(std::move(col), std::move(mask)); - WeakHash32 hash(col_null->size()); - col_null->updateWeakHash32(hash); + WeakHash32 hash = col_null->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array) columns.emplace_back(ColumnArray::create(std::move(val), std::move(off))); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq_data); } diff --git a/src/Common/WeakHash.cpp b/src/Common/WeakHash.cpp index 54d973b6296..cb12df84db1 100644 --- a/src/Common/WeakHash.cpp +++ b/src/Common/WeakHash.cpp @@ -1,2 +1,24 @@ #include +#include +#include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void WeakHash32::update(const WeakHash32 & other) +{ + size_t size = data.size(); + if (size != other.data.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:" + "left size is {}, right size is {}", size, other.data.size()); + + for (size_t i = 0; i < size; ++i) + data[i] = static_cast(intHashCRC32(other.data[i], data[i])); +} + +} diff --git a/src/Common/WeakHash.h b/src/Common/WeakHash.h index b59624e64f2..d4a8d63868c 100644 --- a/src/Common/WeakHash.h +++ b/src/Common/WeakHash.h @@ -11,9 +11,8 @@ namespace DB /// The main purpose why this class needed is to support data initialization. Initially, every bit is 1. class WeakHash32 { - static constexpr UInt32 kDefaultInitialValue = ~UInt32(0); - public: + static constexpr UInt32 kDefaultInitialValue = ~UInt32(0); using Container = PaddedPODArray; @@ -22,6 +21,8 @@ public: void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); } + void update(const WeakHash32 & other); + const Container & getData() const { return data; } Container & getData() { return data; } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 4493a9f4dbd..ac940c62a1a 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co { const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst(); const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col)); - key_col_no_lc->updateWeakHash32(hash); + hash.update(key_col_no_lc->getWeakHash32()); } return hashToSelector(hash, num_shards); } diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 1788c9aca48..180a45d4295 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl for (const auto & key_name : key_columns_names) { ColumnPtr key_col = materializeColumn(block, key_name); - key_col->updateWeakHash32(hash); + hash.update(key_col->getWeakHash32()); } auto selector = hashToSelector(hash, sharder); diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp index 6e3cdc0fda1..16d265c9bcb 100644 --- a/src/Processors/Transforms/ScatterByPartitionTransform.cpp +++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp @@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks() hash.reset(num_rows); for (const auto & column_number : key_columns) - columns[column_number]->updateWeakHash32(hash); + hash.update(columns[column_number]->getWeakHash32()); const auto & hash_data = hash.getData(); IColumn::Selector selector(num_rows); From 05874d0b85d78c1067c1db5332c7cc74b94d88cd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Jul 2024 15:37:55 +0000 Subject: [PATCH 16/23] Fixing style. --- src/Columns/ColumnDecimal.cpp | 1 - src/Columns/ColumnNullable.cpp | 2 +- src/Columns/ColumnSparse.cpp | 18 ------------------ src/Columns/ColumnSparse.h | 3 --- 4 files changed, 1 insertion(+), 23 deletions(-) diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index ed9c699a841..e27807950ae 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -28,7 +28,6 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; } template diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 64e99a3bbe8..867c9149242 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -65,7 +65,7 @@ WeakHash32 ColumnNullable::getWeakHash32() const const auto & null_map_data = getNullMapData(); auto & hash_data = hash.getData(); - /// Use defualt for nulls. + /// Use default for nulls. for (size_t row = 0; row < s; ++row) if (null_map_data[row]) hash_data[row] = WeakHash32::kDefaultInitialValue; diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 0d103a263dd..8f98a4433d3 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -809,24 +809,6 @@ size_t ColumnSparse::getValueIndex(size_t n) const return it - offsets_data.begin() + 1; } -size_t ColumnSparse::getFirstDefaultValueIndex() const -{ - if (getNumberOfDefaultRows() == 0) - return size(); - - const auto & offsets_data = getOffsetsData(); - size_t off_size = offsets_data.size(); - - if (off_size == 0 || offsets_data[0] > 0) - return 0; - - size_t idx = 0; - while (idx + 1 < off_size && offsets_data[idx] + 1 == offsets_data[idx + 1]) - ++idx; - - return offsets_data[idx] + 1; -} - ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const { const auto & offsets_data = getOffsetsData(); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index a5d4d788b17..392a6910956 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -173,9 +173,6 @@ public: /// O(log(offsets.size())) complexity, size_t getValueIndex(size_t n) const; - /// Returns an index of the first default value, or size() if there is no defaults. - size_t getFirstDefaultValueIndex() const; - const IColumn & getValuesColumn() const { return *values; } IColumn & getValuesColumn() { return *values; } From 87fafaa9f5f8406228440631acf597c782a3ecdd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Jul 2024 11:00:50 +0000 Subject: [PATCH 17/23] Remove flaky case from the test. --- ..._finctions_and_column_sparse_bug.reference | 73 ------------------- ...window_finctions_and_column_sparse_bug.sql | 54 -------------- 2 files changed, 127 deletions(-) diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference index 356329a392d..13e229432ae 100644 --- a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference @@ -8,76 +8,3 @@ true 1 1 true 1 2 --- -755809149 0 ---- -1 -2081147898 -1 -1981899149 -2 -2051538534 -2 -1650266905 -3 -1975508531 -3 -1646738223 -4 -1700730666 -4 -1618912877 -5 -1465484835 -5 -1317193174 -6 -1458338029 -6 -1219769753 -7 -1450619195 -7 -1154269118 -8 -1365934326 -8 -1150980622 -9 -1203382363 -9 -1098155311 -10 -1197430632 -10 -841067875 -11 -1176267855 -11 -816935497 -12 -1020892864 -12 -599948807 -13 -991301833 -13 -526570556 -14 -685902265 -14 -504713125 -15 -653505826 -15 -411038390 -16 -451392958 -16 -331834394 -17 -262516786 -17 -176934810 -18 -222873194 -18 -2 -19 -153185515 -19 6 -20 -74234560 -20 255 -21 -41 -21 406615258 -22 -6 -22 541758331 -23 -5 -23 720985423 -24 -3 -24 745669725 -25 15 -25 897064234 -26 65535 -26 1116921321 -27 77089559 -27 1207796283 -28 100663045 -28 1603772265 -29 561061873 -29 1664059402 -30 643897141 -30 1688303275 -31 914629990 -31 1913361922 -32 1159852204 -32 1929066636 -33 1258218855 -33 1968095908 -34 1459407556 -34 2054878592 -35 1936334332 -35 2125075305 -36 1962246186 -37 2030467062 diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql index 6e326d0a67f..f2391e0d165 100644 --- a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql @@ -31,57 +31,3 @@ SELECT * FROM ( SELECT c, min(w) OVER (PARTITION BY s ORDER BY c ASC, s ASC, w ASC) FROM t limit toUInt64(-1)) WHERE c = -755809149; - -SELECT '---'; - -create table t_vkx4cc ( - c_ylzjpt Int32, - c_hqfr9 Bool , - ) engine = MergeTree order by c_ylzjpt; - -system stop merges t_vkx4cc; - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-2081147898, coalesce((NOT NOT(cast( (53 < 539704722) as Nullable(Bool)))), true)), (-1219769753, coalesce((true) and (false), false)), (-1981899149, coalesce(false, false)), (-1646738223, coalesce((NOT NOT(cast( (23.5 <= -26) as Nullable(Bool)))), false)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (255, coalesce(false, false)), (-1317193174, coalesce(false, false)), (-41, coalesce(true, false)), (1929066636, coalesce(false, true)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-1700730666, coalesce((NOT NOT(cast( (-2022515471055597472 AND -29) as Nullable(Bool)))), false)), (1664059402, coalesce((NOT NOT(cast( (-19643 >= -122) as Nullable(Bool)))), false)), (1688303275, coalesce((NOT NOT(cast( (737275892 < 105) as Nullable(Bool)))), true)), (406615258, coalesce((NOT NOT(cast( (-657730213 = 82.86) as Nullable(Bool)))), false)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-2, coalesce(false, false)), (1962246186, coalesce(true, false)), (-991301833, coalesce(true, true)), (2054878592, coalesce(false, false)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (643897141, coalesce((NOT NOT(cast( (-60 AND cast(null as Nullable(Int64))) as Nullable(Bool)))), true)), (-2051538534, coalesce(((-1616816511 between 332225780 and -1883087387)) or ((-573375170 between -1427445977 and 615586748)), false)), (77089559, coalesce((NOT NOT(cast( ((true) and (true) != 925456787) as Nullable(Bool)))), false)), (1116921321, coalesce((0 is NULL), true)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-816935497, coalesce(false, false)), (1207796283, coalesce((-129 between -5 and -5), false)), (-1365934326, coalesce(true, false)), (-1618912877, coalesce((NOT NOT(cast( (false >= 31833) as Nullable(Bool)))), false)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-331834394, coalesce((NOT NOT(cast( (-63 <= -1822810052) as Nullable(Bool)))), true)), (-1020892864, coalesce((NOT NOT(cast( (40.31 <= 8146037365746019777) as Nullable(Bool)))), true)), (-1150980622, coalesce(((94019304 between -730556489 and 32)) and ((-956354236 is not NULL)), true)), (-1203382363, coalesce(true, true)); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9) values (-653505826, coalesce((true) or (true), false)), (-1975508531, coalesce(((-796885845 between 65536 and cast(null as Nullable(Int32)))) or ((NOT NOT(cast( (-7467729336434250795 < 100.20) as Nullable(Bool))))), false)), (-1465484835, coalesce(((NOT NOT(cast( (19209 <= 75.96) as Nullable(Bool))))) or (true), false)), (1968095908, coalesce((NOT NOT(cast( (-1309960412156062327 > 13102) as Nullable(Bool)))), true)); - -alter table t_vkx4cc add column c_zosphq2t1 Float64; - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-153185515, coalesce((NOT NOT(cast( (1291639145 >= 30.22) as Nullable(Bool)))), false), -1.8), (-411038390, coalesce(((-762326135 between 16 and 177530758)) or (false), true), 26.34), (914629990, coalesce((-1125832977 is not NULL), true), 59.2), (541758331, coalesce(false, true), -255.1); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (2125075305, coalesce(false, false), 55.36), (-1176267855, coalesce(true, true), 55.45), (1459407556, coalesce((true) and ((NOT NOT(cast( (95.96 != 65) as Nullable(Bool))))), true), 85.80), (-1098155311, coalesce(false, false), 2147483649.9); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (6, coalesce((NOT NOT(cast( (1546334968 < -4) as Nullable(Bool)))), true), 57.42), (-5, coalesce((NOT NOT(cast( (59 AND 13) as Nullable(Bool)))), false), 65536.3), (100663045, coalesce((-1190355242 is not NULL), true), 73.80), (-451392958, coalesce((NOT NOT(cast( (false != -443845933) as Nullable(Bool)))), false), -4294967294.0); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (561061873, coalesce(true, false), 12.17), (-526570556, coalesce(false, false), 64.73), (-1450619195, coalesce(true, true), 54.33), (-3, coalesce(true, true), 52.9); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-504713125, coalesce(false, true), 27.58), (897064234, coalesce((836516994 between cast(null as Nullable(Int32)) and -1832647080), true), 9223372036854775809.2), (65535, coalesce(true, true), 4294967297.5), (-599948807, coalesce((false) or ((NOT NOT(cast( (6.52 = 65.49) as Nullable(Bool))))), false), 256.5); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-1650266905, coalesce((NOT NOT(cast( (-83 = -218055084) as Nullable(Bool)))), true), 1.9), (-841067875, coalesce(false, true), -126.5), (15, coalesce(((NOT NOT(cast( (cast(null as Nullable(Decimal)) = cast(null as Nullable(Int32))) as Nullable(Bool))))) or (true), true), 33.65), (1913361922, coalesce((NOT NOT(cast( (false AND 0) as Nullable(Bool)))), false), 6.4); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (1159852204, coalesce((-2057115045 is not NULL), false), 20.61), (-6, coalesce(true, true), 66.33), (-1154269118, coalesce(false, true), 8.89), (1258218855, coalesce(true, false), 19.80); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (1603772265, coalesce(false, true), 57.87), (-176934810, coalesce(false, true), 128.8), (-1458338029, coalesce((NOT NOT(cast( (20908 != (NOT NOT(cast( (cast(null as Nullable(Decimal)) <= (true) or ((NOT NOT(cast( (973511022 <= -112) as Nullable(Bool)))))) as Nullable(Bool))))) as Nullable(Bool)))), true), 76.54), (-262516786, coalesce((cast(null as Nullable(Int32)) is NULL), false), 21.49); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (-1197430632, coalesce(true, false), 45.40), (-685902265, coalesce((NOT NOT(cast( (cast(null as Nullable(Decimal)) < cast(null as Nullable(Decimal))) as Nullable(Bool)))), true), 5.55), (1936334332, coalesce((-1565552735 is not NULL), false), 26.28), (2030467062, coalesce((NOT NOT(cast( (127.3 != cast(null as Nullable(Int32))) as Nullable(Bool)))), true), 89.50); - -insert into t_vkx4cc (c_ylzjpt, c_hqfr9, c_zosphq2t1) values (720985423, coalesce((NOT NOT(cast( (-451448940 = cast(null as Nullable(Decimal))) as Nullable(Bool)))), false), 52.65), (-222873194, coalesce(((-20 between -1419620477 and 1616455043)) or ((25624502 between 1312431316 and 1757361651)), false), 127.2), (745669725, coalesce((NOT NOT(cast( ((NOT NOT(cast( (cast(null as Nullable(UInt64)) <= 42) as Nullable(Bool)))) >= 3233811255032796928) as Nullable(Bool)))), false), 7.74), (-74234560, coalesce((NOT NOT(cast( (cast(null as Nullable(Decimal)) >= cast(null as Nullable(Decimal))) as Nullable(Bool)))), true), 19.25); - -SELECT DISTINCT - count(ref_0.c_zosphq2t1) over (partition by ref_0.c_hqfr9 order by ref_0.c_ylzjpt, ref_0.c_hqfr9, ref_0.c_zosphq2t1) as c0, - ref_0.c_ylzjpt as c1 -FROM - t_vkx4cc as ref_0 - order by c0, c1; From 959ac9a768530cb9e2c7b013df37bfee000a1644 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jul 2024 13:16:35 +0200 Subject: [PATCH 18/23] ci: dump dmesg in case of OOM Without additional info it is unclear how to tune paralelism or maybe split some modules. Signed-off-by: Azat Khuzhin --- tests/ci/ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index cf285f4b97d..c5ad97088aa 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1281,7 +1281,8 @@ def main() -> int: except ValueError: pass if Utils.is_killed_with_oom(): - print("WARNING: OOM while job execution") + print("WARNING: OOM while job execution:") + print(subprocess.run("sudo dmesg -T", check=False)) error = f"Out Of Memory, exit_code {job_report.exit_code}, after {int(job_report.duration)}s" else: error = f"Unknown, exit_code {job_report.exit_code}, after {int(job_report.duration)}s" From 69ad57a2a52d50510b4ec5beea08c0c599846859 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jul 2024 03:58:07 +0200 Subject: [PATCH 19/23] Update 03205_parallel_window_finctions_and_column_sparse_bug.sql --- .../03205_parallel_window_finctions_and_column_sparse_bug.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql index f2391e0d165..4cc54bb5ac2 100644 --- a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql @@ -6,7 +6,7 @@ insert into t values (1, 0); insert into t values (1, 1); insert into t values (1, 0)(1, 1); -SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2; +SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2, allow_experimental_analyzer = 1; SELECT '---'; SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t order by d, c8 settings max_threads=2; SELECT '---'; From 3fb01ed2c9154d79cd9d23e3ae2e8708e86d0a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 19 Jul 2024 12:10:28 +0000 Subject: [PATCH 20/23] Use nonexistent address to check connection error at table creation --- tests/integration/test_storage_rabbitmq/test.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee81..c163f3f7aed 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2220,13 +2220,11 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster): def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster): - # no connection when table is initialized - rabbitmq_cluster.pause_container("rabbitmq1") - instance.query_and_get_error( + error = instance.query_and_get_error( """ CREATE TABLE test.cs (key UInt64, value UInt64) ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + SETTINGS rabbitmq_host_port = 'no_connection_at_startup:5672', rabbitmq_exchange_name = 'cs', rabbitmq_format = 'JSONEachRow', rabbitmq_flush_interval_ms=1000, @@ -2234,7 +2232,7 @@ def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster): rabbitmq_row_delimiter = '\\n'; """ ) - rabbitmq_cluster.unpause_container("rabbitmq1") + assert "CANNOT_CONNECT_RABBITMQ" in error def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster): From f9b97aac84d6ab5f377261e5b57c2d6e8a432cef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 23:19:33 +0200 Subject: [PATCH 21/23] Fix bad test `02950_part_log_bytes_uncompressed` --- .../0_stateless/02950_part_log_bytes_uncompressed.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql index 248475ab84b..cfed02eaeeb 100644 --- a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql @@ -1,3 +1,6 @@ +-- Tags: no-random-merge-tree-settings, no-random-settings +-- Because we compare part sizes, and they could be affected by index granularity and index compression settings. + CREATE TABLE part_log_bytes_uncompressed ( key UInt8, value UInt8 @@ -17,7 +20,8 @@ ALTER TABLE part_log_bytes_uncompressed DROP PART 'all_4_4_0' SETTINGS mutations SYSTEM FLUSH LOGS; -SELECT event_type, table, part_name, bytes_uncompressed > 0, size_in_bytes < bytes_uncompressed FROM system.part_log +SELECT event_type, table, part_name, bytes_uncompressed > 0, size_in_bytes < bytes_uncompressed ? '1' : toString((size_in_bytes, bytes_uncompressed)) +FROM system.part_log WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'part_log_bytes_uncompressed' AND (event_type != 'RemovePart' OR part_name = 'all_4_4_0') -- ignore removal of other parts ORDER BY part_name, event_type; From 67f51153661f37370f4e365ed3f19c6a4b84c2f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 04:16:31 +0200 Subject: [PATCH 22/23] Update 02950_part_log_bytes_uncompressed.sql --- tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql index cfed02eaeeb..24425062116 100644 --- a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql @@ -20,7 +20,7 @@ ALTER TABLE part_log_bytes_uncompressed DROP PART 'all_4_4_0' SETTINGS mutations SYSTEM FLUSH LOGS; -SELECT event_type, table, part_name, bytes_uncompressed > 0, size_in_bytes < bytes_uncompressed ? '1' : toString((size_in_bytes, bytes_uncompressed)) +SELECT event_type, table, part_name, bytes_uncompressed > 0, (bytes_uncompressed > 0 ? (size_in_bytes < bytes_uncompressed ? '1' : toString((size_in_bytes, bytes_uncompressed))) : '0') FROM system.part_log WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'part_log_bytes_uncompressed' AND (event_type != 'RemovePart' OR part_name = 'all_4_4_0') -- ignore removal of other parts From 0c2c027af63fcbababffbe3a39ed2631884e1938 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 12:30:20 +0200 Subject: [PATCH 23/23] Remove bad tests @azat --- ...2_part_log_rmt_fetch_merge_error.reference | 10 ----- .../03002_part_log_rmt_fetch_merge_error.sql | 35 ---------------- ..._part_log_rmt_fetch_mutate_error.reference | 10 ----- .../03002_part_log_rmt_fetch_mutate_error.sql | 41 ------------------- 4 files changed, 96 deletions(-) delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference deleted file mode 100644 index b19d389d8d0..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 1 -after -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql deleted file mode 100644 index 548a8e5570a..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by key settings always_fetch_merged_part=0; --- always_fetch_merged_part=1, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by key settings always_fetch_merged_part=1; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; - -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; --- sleep few seconds to try rmt_slave to fetch the part and reflect this error --- in system.part_log -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference deleted file mode 100644 index aac9e7527d1..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 1 -rmt_slave MutatePart 1 0 -after -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 2 -rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql deleted file mode 100644 index d8b5ebb3148..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; --- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -system stop pulling replication log rmt_slave; -alter table rmt_master update key=key+100 where 1 settings alter_sync=1; - --- first we need to make the rmt_master execute mutation so that it will have --- the part, and rmt_slave will consider it instead of performing mutation on --- it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored -select sleep(3) format Null; -system start pulling replication log rmt_slave; --- and sleep few more seconds to try rmt_slave to fetch the part and reflect --- this error in system.part_log -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave;