From 413e190916303bbb1ddf8556ef15d27e0f8a0354 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 15:10:39 +0000 Subject: [PATCH 001/140] enable optimize_functions_to_subcolumns y default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0e6da579b10..5200e9f775a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -580,7 +580,7 @@ class IColumn; M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ From c1fc12fd35ab3a41edceb68ec3679bed11a69577 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 22:51:06 +0000 Subject: [PATCH 002/140] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 223 +++++++++++++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 2 +- .../RewriteFunctionToSubcolumnVisitor.h | 1 + src/Interpreters/TreeOptimizer.cpp | 41 +++- 4 files changed, 231 insertions(+), 36 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index cd635f87e0e..932e715a935 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -23,48 +23,194 @@ namespace DB namespace { -class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext +std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) +{ + auto * function_node = node->as(); + if (!function_node) + return {}; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) + return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + if (!first_argument_column_node) + return {}; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + if (!table_node->getStorageSnapshot()) + return {}; + + if (!table_node->getStorage()->supportsSubcolumns()) + return {}; + + return std::make_tuple(function_node, first_argument_column_node, table_node); +} + +class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void enterImpl(QueryTreeNodePtr & node) const + struct Data { - if (!getSettings().optimize_functions_to_subcolumns) + std::unordered_set all_key_columns; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + Data getData() const { return data; } + + void enterImpl(const QueryTreeNodePtr & node) + { + if (auto * column_node = node->as()) + { + enterImpl(*column_node); return; + } - auto * function_node = node->as(); - if (!function_node) + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); + if (function_node && first_argument_node && table_node) + { + enterImpl(*function_node, *first_argument_node, *table_node); return; + } + } - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - size_t function_arguments_nodes_size = function_arguments_nodes.size(); - - if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) - return; - - auto * first_argument_column_node = function_arguments_nodes.front()->as(); - - if (!first_argument_column_node) - return; - - auto column_source = first_argument_column_node->getColumnSource(); +private: + void enterImpl(const ColumnNode & column_node) + { + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) return; - const auto & storage = table_node->getStorage(); - if (!storage->supportsSubcolumns()) - return; + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column_node.getColumnName()}); - auto column = first_argument_column_node->getColumn(); + ++data.indentifiers_count[qualified_name]; + + if (processed_tables.emplace(table_name).second) + { + const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + } + + void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) + { + const auto & function_arguments_nodes = function_node.getArguments().getNodes(); + const auto & function_name = function_node.getFunctionName(); + + auto column = first_argument_column_node.getColumn(); WhichDataType column_type(column.type); + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (function_arguments_nodes.size() == 1) + { + if (column_type.isArray()) + { + if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isNullable()) + { + if (function_name == "isNull" || function_name == "isNotNull") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isMap()) + { + if (function_name == "mapKeys" || function_name == "mapValues") + ++data.optimized_identifiers_count[qualified_name]; + } + } + else if (function_arguments_nodes.size() == 2) + { + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) + { + /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` + * with `tuple_argument.column_name`. + */ + const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); + const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + + if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + ++data.optimized_identifiers_count[qualified_name]; + } + else if (function_name == "mapContains" && column_type.isMap()) + { + ++data.optimized_identifiers_count[qualified_name]; + } + } + } + + Data data; + NameSet processed_tables; +}; + + +class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set identifiers_to_optimize_) + : Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + + void enterImpl(QueryTreeNodePtr & node) const + { + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); + if (!function_node || !first_argument_column_node || !table_node) + return; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); const auto & function_name = function_node->getFunctionName(); - if (function_arguments_nodes_size == 1) + auto column = first_argument_column_node->getColumn(); + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (!identifiers_to_optimize.contains(qualified_name)) + return; + + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) { @@ -72,7 +218,6 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; - node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -106,7 +251,6 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; - node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") @@ -140,10 +284,9 @@ public: } } } - else + else if (function_arguments_nodes.size() == 2) { const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` @@ -193,6 +336,8 @@ public: } private: + std::unordered_set identifiers_to_optimize; + inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); @@ -204,8 +349,26 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - FunctionToSubcolumnsVisitor visitor(context); - visitor.visit(query_tree_node); + if (!context->getSettingsRef().optimize_functions_to_subcolumns) + return; + + std::unordered_set identifiers_to_optimize; + + { + FunctionToSubcolumnsVisitorFirstPass visitor(context); + visitor.visit(query_tree_node); + + auto data = visitor.getData(); + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + } + + if (!identifiers_to_optimize.empty()) + { + FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); + visitor.visit(query_tree_node); + } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 506fa13b7ba..5747ce5a3a1 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -78,7 +78,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage)) + if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) return; const auto & column_type = columns.get(name_in_storage).type; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 4d064bdee10..3c945da92ec 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -17,6 +17,7 @@ public: void visit(ASTFunction & function, ASTPtr & ast) const; StorageMetadataPtr metadata_snapshot; + IdentifierNameSet forbidden_identifiers; }; using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 729e2ed6007..24599ed0044 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -656,10 +656,41 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot) +void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot}; - RewriteFunctionToSubcolumnVisitor(data).visit(query); + if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + return; + + const auto & metadata_snapshot = result.storage_snapshot->metadata; + // const auto & select_query = assert_cast(*query); + + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + // if (select_query.final()) + // return; + + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + + IdentifierNameSet forbidden_identifiers; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); + forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + } + + RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; + RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) @@ -726,8 +757,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) - optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); + if (settings.optimize_functions_to_subcolumns) + optimizeFunctionsToSubcolumns(query, result); /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) From f79202bd532594cd441be928a11691a51fe88e65 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 16:12:41 +0000 Subject: [PATCH 003/140] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 57 ++++++++++++------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 932e715a935..d901692ba27 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -32,19 +33,14 @@ std::tuple getTypedNodesForOptimizati auto & function_arguments_nodes = function_node->getArguments().getNodes(); if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); if (!first_argument_column_node) return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) - return {}; - - if (!table_node->getStorageSnapshot()) - return {}; - - if (!table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -58,6 +54,7 @@ public: struct Data { + bool has_final = false; std::unordered_set all_key_columns; std::unordered_map indentifiers_count; std::unordered_map optimized_identifiers_count; @@ -67,12 +64,31 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (data.has_final) + return; + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } + if (auto * table_node = node->as()) + { + if (table_node->hasTableExpressionModifiers() + && table_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + + if (auto * table_function_node = node->as()) + { + if (table_function_node->hasTableExpressionModifiers() + && table_function_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -159,9 +175,6 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); @@ -352,22 +365,22 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr if (!context->getSettingsRef().optimize_functions_to_subcolumns) return; + FunctionToSubcolumnsVisitorFirstPass first_visitor(context); + first_visitor.visit(query_tree_node); + auto data = first_visitor.getData(); + + if (data.has_final) + return; + std::unordered_set identifiers_to_optimize; - - { - FunctionToSubcolumnsVisitorFirstPass visitor(context); - visitor.visit(query_tree_node); - - auto data = visitor.getData(); - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - } + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); if (!identifiers_to_optimize.empty()) { - FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); - visitor.visit(query_tree_node); + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } From cb4c78af59abca8788dfbd880fa6b7042fca3e6c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 23:13:04 +0000 Subject: [PATCH 004/140] fix optimize_functions_to_subcolumns with old analyzer --- .../Passes/FunctionToSubcolumnsPass.cpp | 131 ++++++++++-------- src/DataTypes/DataTypeTuple.cpp | 4 +- .../RewriteFunctionToSubcolumnVisitor.cpp | 126 +++++++++++++---- .../RewriteFunctionToSubcolumnVisitor.h | 34 ++++- src/Interpreters/TreeOptimizer.cpp | 50 +++++-- .../02286_tuple_numeric_identifier.sql | 6 +- 6 files changed, 238 insertions(+), 113 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index d901692ba27..82d50f5fdb1 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -40,7 +40,7 @@ std::tuple getTypedNodesForOptimizati auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -67,28 +67,18 @@ public: if (data.has_final) return; + if (auto * table_node = node->as()) + { + enterImpl(*table_node); + return; + } + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } - if (auto * table_node = node->as()) - { - if (table_node->hasTableExpressionModifiers() - && table_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - - if (auto * table_function_node = node->as()) - { - if (table_function_node->hasTableExpressionModifiers() - && table_function_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -98,6 +88,45 @@ public: } private: + Data data; + NameSet processed_tables; + + void enterImpl(const TableNode & table_node) + { + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + { + data.has_final = true; + return; + } + + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + if (processed_tables.emplace(table_name).second) + return; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + void enterImpl(const ColumnNode & column_node) { auto column_source = column_node.getColumnSource(); @@ -109,36 +138,6 @@ private: Identifier qualified_name({table_name, column_node.getColumnName()}); ++data.indentifiers_count[qualified_name]; - - if (processed_tables.emplace(table_name).second) - { - const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; - - auto add_key_columns = [&](const auto & key_columns) - { - for (const auto & column_name : key_columns) - { - Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); - } - }; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - add_key_columns(partition_key_columns); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - add_key_columns(index_columns); - } - } } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) @@ -187,9 +186,6 @@ private: } } } - - Data data; - NameSet processed_tables; }; @@ -214,15 +210,15 @@ public: const auto & function_name = function_node->getFunctionName(); auto column = first_argument_column_node->getColumn(); - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); - Identifier qualified_name({table_name, column.name}); + Identifier qualified_name({table_name, column.name}); if (!identifiers_to_optimize.contains(qualified_name)) return; + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) @@ -369,19 +365,36 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. if (data.has_final) return; + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + std::unordered_set identifiers_to_optimize; for (const auto & [identifier, count] : data.optimized_identifiers_count) if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) identifiers_to_optimize.insert(identifier); - if (!identifiers_to_optimize.empty()) - { - FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); - second_visitor.visit(query_tree_node); - } + if (identifiers_to_optimize.empty()) + return; + + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 9cce59b0dca..6edbb8b27eb 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; - extern const int ILLEGAL_INDEX; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; } @@ -270,7 +270,7 @@ std::optional DataTypeTuple::tryGetPositionByName(const String & name) c String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size()); return names[i - 1]; } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 5747ce5a3a1..2a235ae31e4 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -63,42 +63,104 @@ const std::unordered_map getColumnFromArgumentsToOptimize( + const ASTs & arguments, + const StorageMetadataPtr & metadata_snapshot) { - const auto & arguments = function.arguments->children; if (arguments.empty() || arguments.size() > 2) - return; + return {}; const auto * identifier = arguments[0]->as(); if (!identifier) - return; + return {}; const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) - return; + if (!columns.has(name_in_storage)) + return {}; const auto & column_type = columns.get(name_in_storage).type; - TypeIndex column_type_id = column_type->getTypeId(); + return NameAndTypePair{name_in_storage, column_type}; +} + +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) +{ + if (const auto * identifier = ast->as()) + { + ++data.indentifiers_count[identifier->name()]; + return; + } + + if (const auto * function = ast->as()) + { + visit(*function, data); + return; + } +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); + + if (arguments.size() == 1) + { + auto it = unary_function_to_subcolumn.find(function.name); + if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + else + { + if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) + { + const auto * literal = arguments[1]->as(); + if (!literal) + return; + + auto value_type = literal->value.getType(); + if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } + else + { + auto it = binary_function_to_subcolumn.find(function.name); + if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + } +} + +void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); const auto & alias = function.tryGetAlias(); if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - } + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name); + ast->setAlias(alias); } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -110,30 +172,34 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) auto value_type = literal->value.getType(); if (value_type == Field::Types::UInt64) { - const auto & type_tuple = assert_cast(*column_type); + const auto & type_tuple = assert_cast(*column->type); auto index = literal->value.get(); subcolumn_name = type_tuple.getNameByPosition(index); } else if (value_type == Field::Types::String) + { subcolumn_name = literal->value.get(); + } else + { return; + } - ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } else { auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name, arguments[1]); - ast->setAlias(alias); - } - } + if (it == binary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name, arguments[1]); + ast->setAlias(alias); } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 3c945da92ec..08eb6e27c52 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -7,20 +7,46 @@ namespace DB { class ASTFunction; +class ASTIdentifier; + +/// Collects info about identifiers to select columns to optimize to subcolumns. +class RewriteFunctionToSubcolumnFirstPassMatcher +{ +public: + struct Data + { + explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} + + StorageMetadataPtr metadata_snapshot; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + static void visit(const ASTPtr & ast, Data & data); + static void visit(const ASTFunction & function, Data & data); + static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } +}; + +using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; /// Rewrites functions to subcolumns, if possible, to reduce amount of read data. /// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnData +class RewriteFunctionToSubcolumnSecondPassData { public: using TypeToVisit = ASTFunction; void visit(ASTFunction & function, ASTPtr & ast) const; + RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) + : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + StorageMetadataPtr metadata_snapshot; - IdentifierNameSet forbidden_identifiers; + NameSet identifiers_to_optimize; }; -using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor; +using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; +using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 3df6242fa85..07cfe897010 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -659,35 +659,55 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & re return; const auto & metadata_snapshot = result.storage_snapshot->metadata; - // const auto & select_query = assert_cast(*query); + const auto & select_query = assert_cast(*query); /// For queries with FINAL converting function to subcolumn may alter /// special merging algorithms and produce wrong result of query. - // if (select_query.final()) - // return; + if (select_query.final()) + return; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + NameSet all_key_columns; - IdentifierNameSet forbidden_identifiers; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); for (const auto & index : metadata_snapshot->getSecondaryIndices()) { const auto & index_columns = index.expression->getRequiredColumns(); - forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + all_key_columns.insert(index_columns.begin(), index_columns.end()); } - RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; - RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); + RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); + + NameSet identifiers_to_optimize; + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + + if (identifiers_to_optimize.empty()) + return; + + RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); + RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index f723284ad61..151ff275f7b 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,9 +12,9 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER, ARGUMENT_OUT_OF_BOUND} DROP TABLE t_tuple_numeric; From 93c362a803ff924f4176f6cd8483c08d731ccb59 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 12 Jan 2024 12:06:38 +0000 Subject: [PATCH 005/140] return and fix test --- .../01600_parts_states_metrics_long.reference | 4 ++ .../01600_parts_states_metrics_long.sh | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/queries/0_stateless/01600_parts_states_metrics_long.reference create mode 100755 tests/queries/0_stateless/01600_parts_states_metrics_long.sh diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.reference b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh new file mode 100755 index 00000000000..2e47034e528 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# NOTE: database = $CLICKHOUSE_DATABASE is unwanted +verify_sql="SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" + +# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. +# So, there is inherent race condition. But it should get expected result eventually. +# In case of test failure, this code will do infinite loop and timeout. +verify() +{ + while true + do + result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) + [ "$result" = "1" ] && break + sleep 0.1 + done + echo 1 +} + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +verify + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +verify + +$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +verify + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +verify From 8a0126204272fdcecd722595a2d7e64496ba7c94 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 15 Jan 2024 18:17:36 +0000 Subject: [PATCH 006/140] fix tests --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 9 ++++++--- tests/queries/0_stateless/02116_tuple_element.sql | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 82d50f5fdb1..c5d34b5462a 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -64,6 +64,9 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (!getSettings().optimize_functions_to_subcolumns) + return; + if (data.has_final) return; @@ -202,6 +205,9 @@ public: void enterImpl(QueryTreeNodePtr & node) const { + if (!getSettings().optimize_functions_to_subcolumns) + return; + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); if (!function_node || !first_argument_column_node || !table_node) return; @@ -358,9 +364,6 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - if (!context->getSettingsRef().optimize_functions_to_subcolumns) - return; - FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..ece7114e763 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -17,8 +17,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; @@ -29,8 +29,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; From e6ad9dd387cc004096eee3c2bfbadf6689473203 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Jan 2024 17:21:15 +0000 Subject: [PATCH 007/140] fix crash with optimize_functions_to_subcolumns --- src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 2a235ae31e4..eaf27a7ae80 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -81,6 +81,9 @@ std::optional getColumnFromArgumentsToOptimize( return {}; const auto & column_type = columns.get(name_in_storage).type; + if (column_type->hasDynamicSubcolumns()) + return {}; + return NameAndTypePair{name_in_storage, column_type}; } From 0c9926a7045d6200ad2e486f3ee3532c1cbbca16 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Jan 2024 15:42:31 +0000 Subject: [PATCH 008/140] fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 28 +++++++++++++++---- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Storages/HDFS/StorageHDFS.h | 1 + src/Storages/HDFS/StorageHDFSCluster.h | 2 -- src/Storages/IStorage.h | 2 ++ src/Storages/IStorageCluster.h | 4 ++- src/Storages/S3Queue/StorageS3Queue.h | 1 + src/Storages/StorageAzureBlob.h | 2 ++ src/Storages/StorageAzureBlobCluster.h | 2 -- src/Storages/StorageFile.h | 1 + src/Storages/StorageFileCluster.h | 2 -- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Cluster.h | 2 -- src/Storages/StorageURL.h | 1 + src/Storages/StorageURLCluster.h | 2 -- 15 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 7b5f3a433ad..9aa785d5918 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -35,17 +35,29 @@ std::tuple getTypedNodesForOptimizati return {}; auto * first_argument_column_node = function_arguments_nodes.front()->as(); - if (!first_argument_column_node) + if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set") return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node) + return {}; + + const auto & storage = table_node->getStorage(); + const auto & storage_snapshot = table_node->getStorageSnapshot(); + auto column = first_argument_column_node->getColumn(); + + if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) + return {}; + + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + if (!column_in_table || !column_in_table->type->equals(*column.type)) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); } +/// First pass collects info about identifiers to determine which identifiers are allowed to optimize. class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: @@ -132,6 +144,9 @@ private: void enterImpl(const ColumnNode & column_node) { + if (column_node.getColumnName() == "__grouping_set") + return; + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); if (!table_node) @@ -191,7 +206,7 @@ private: } }; - +/// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { public: @@ -222,9 +237,6 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - if (first_argument_column_node->getColumnName() == "__grouping_set") - return; - auto column_source = first_argument_column_node->getColumnSource(); WhichDataType column_type(column.type); @@ -236,6 +248,8 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -269,6 +283,8 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 07cfe897010..8fab032aece 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -655,7 +655,7 @@ void transformIfStringsIntoEnum(ASTPtr & query) void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) return; const auto & metadata_snapshot = result.storage_snapshot->metadata; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index f1f0019d3e0..bd36556c017 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -81,6 +81,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 7c4c41a573a..f35a912129c 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,8 +37,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..62faedd19ba 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -167,6 +167,8 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if storage supports optimizations of functions by reading subcolumns. + virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); } /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. /// Storage may throw an exception later if some query kind is not fully supported. diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index b233f20103d..0e466976852 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -38,7 +38,9 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override { return true; } + bool isRemote() const override final { return true; } + bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } protected: virtual void updateBeforeRead(const ContextPtr &) {} diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 3d3594dc2ab..0b50913546e 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -86,6 +86,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 16e5b9edfb6..4d54f1cdcc3 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -101,6 +101,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsTrivialCountOptimization() const override { return true; } diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 2831b94f825..c95e329803c 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b74868597a6..db7d8be15cf 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -93,6 +93,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index a6e57c3bb4f..cb00e8870e8 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -33,8 +33,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index b90a0d394cb..a027f96aa0a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -386,6 +386,7 @@ private: ContextPtr ctx); bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsSubsetOfColumns(const ContextPtr & context) const; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index c526f14834a..81169f79746 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } protected: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 07d4d0cad38..f16f2757611 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -281,6 +281,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 07978040029..a555df3cd43 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: From a89956bb0f614d81d3db7998ea026a0a01db8cd4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 22 Jan 2024 19:33:34 +0000 Subject: [PATCH 009/140] more cases for optimize_functions_to_subcolumns --- ...egateFunctionsArithmericOperationsPass.cpp | 27 +++----- .../Passes/ComparisonTupleEliminationPass.cpp | 13 ++-- src/Analyzer/Passes/CountDistinctPass.cpp | 7 +-- .../Passes/FunctionToSubcolumnsPass.cpp | 58 +++++++++++------ .../Passes/NormalizeCountVariantsPass.cpp | 13 +--- ...ateOrDateTimeConverterWithPreimagePass.cpp | 25 +++----- .../RewriteAggregateFunctionWithIfPass.cpp | 25 +++----- .../RewriteSumFunctionWithSumAndCountPass.cpp | 31 ++-------- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 16 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 11 +--- src/Analyzer/Passes/UniqToCountPass.cpp | 6 +- src/Analyzer/Utils.cpp | 22 +++++++ src/Analyzer/Utils.h | 8 +++ src/Interpreters/InterpreterExplainQuery.cpp | 26 +++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 62 +++++++------------ .../01872_functions_to_subcolumns.reference | 18 +++--- .../01872_functions_to_subcolumns.sql | 1 - ...functions_to_subcolumns_analyzer.reference | 50 +++++++++++++++ ...01872_functions_to_subcolumns_analyzer.sql | 42 +++++++++++++ .../0_stateless/02115_map_contains.reference | 2 +- .../02115_map_contains_analyzer.reference | 4 ++ .../02115_map_contains_analyzer.sql | 13 ++++ .../0_stateless/02116_tuple_element.reference | 10 +-- .../02116_tuple_element_analyzer.reference | 25 ++++++++ .../02116_tuple_element_analyzer.sql | 43 +++++++++++++ ...tions_to_subcolumns_column_names.reference | 14 +++++ ...1_functions_to_subcolumns_column_names.sql | 19 ++++++ ...2971_functions_to_subcolumns_map.reference | 8 +++ .../02971_functions_to_subcolumns_map.sql | 19 ++++++ 29 files changed, 411 insertions(+), 207 deletions(-) create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.reference create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.sql create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.reference create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index baecb372c2d..b8a477b8523 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -165,31 +166,17 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); + + resolveAggregateFunctionNodeByName( + aggregate_function_clone_typed, + result_aggregate_function_name, + {arithmetic_function_clone_argument->getResultType()}); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); - resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); + resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); return arithmetic_function_clone; } - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) - { - auto function_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get( - aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 7c38ba81c70..42b53f667b4 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -171,13 +172,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); result_function = std::move(not_function); } @@ -197,17 +198,11 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); return comparison_function; } - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - ContextPtr context; }; diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 07a031fe4e8..a73ca4befcf 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -77,11 +78,9 @@ public: /// Replace `countDistinct` of initial query into `count` auto result_type = function_node->getResultType(); - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9aa785d5918..ac13a505a52 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -178,12 +179,12 @@ private: } else if (column_type.isNullable()) { - if (function_name == "isNull" || function_name == "isNotNull") + if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") ++data.optimized_identifiers_count[qualified_name]; } else if (column_type.isMap()) { - if (function_name == "mapKeys" || function_name == "mapValues") + if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") ++data.optimized_identifiers_count[qualified_name]; } } @@ -192,10 +193,10 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + const auto & constant_value = second_argument_constant_node->getValue(); + const auto & constant_value_type = constant_value.getType(); - if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) ++data.optimized_identifiers_count[qualified_name]; } else if (function_name == "mapContains" && column_type.isMap()) @@ -209,6 +210,9 @@ private: /// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { +private: + std::unordered_set identifiers_to_optimize; + public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; @@ -262,7 +266,7 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "equals"); + resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); } else if (function_name == "notEmpty") { @@ -274,12 +278,27 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "notEquals"); + resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); } } else if (column_type.isNullable()) { - if (function_name == "isNull") + if (function_name == "count") + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + column.name += ".null"; + column.type = std::make_shared(); + + auto column_node = std::make_shared(column, column_source); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + } + else if (function_name == "isNull") { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; @@ -295,12 +314,20 @@ public: function_arguments_nodes = {std::make_shared(column, column_source)}; - resolveOrdinaryFunctionNode(*function_node, "not"); + resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); } } else if (column_type.isMap()) { - if (function_name == "mapKeys") + if (function_name == "length") + { + /// Replace `length(map_argument)` with `map_argument.size0` + column.name += ".size0"; + column.type = std::make_shared(); + + node = std::make_shared(column, column_source); + } + else if (function_name == "mapKeys") { /// Replace `mapKeys(map_argument)` with `map_argument.keys` column.name += ".keys"; @@ -364,19 +391,10 @@ public: auto has_function_argument = std::make_shared(column, column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNode(*function_node, "has"); + resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); } } } - -private: - std::unordered_set identifiers_to_optimize; - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6b801925a6e..6d9e6765608 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -41,25 +42,17 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } -private: - static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 9b9ceacdd4c..cc6fe95101d 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -140,16 +141,16 @@ private: const auto lhs = std::make_shared("greaterOrEquals"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("less"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("and"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -158,16 +159,16 @@ private: const auto lhs = std::make_shared("less"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("greaterOrEquals"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("or"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -176,7 +177,7 @@ private: const auto new_date_filter = std::make_shared("greaterOrEquals"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -185,7 +186,7 @@ private: const auto new_date_filter = std::make_shared("less"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -194,7 +195,7 @@ private: const auto new_date_filter = std::make_shared(comparator); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -205,12 +206,6 @@ private: comparator); } } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 9c89670f3c6..b8962e5a4c1 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -58,8 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } else if (first_const_node) @@ -79,30 +79,21 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) { auto result_type = function_node.getResultType(); + auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - std::string suffix = "If"; - if (result_type->isNullable()) - suffix = "OrNullIf"; - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + resolveAggregateFunctionNodeByName( + function_node, function_node.getFunctionName() + suffix, - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); + {arguments[0]->getResultType(), arguments[1]->getResultType()}); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 7887a1b7175..2f6674946a3 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -77,50 +78,30 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*lhs, column_type); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*rhs_count, column_type); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(rhs_count); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) new_node->getArguments().getNodes() = {rhs, lhs}; - resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext()); if (!new_node) return; node = new_node; - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - const auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), - NullsAction::EMPTY, - {argument_type}, - {}, - properties); - - function_node.resolveAsAggregateFunction(aggregate_function); - } - }; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index c6b1c6eb851..78d5479843e 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -65,7 +66,8 @@ public: auto multiplier_node = function_node_arguments_nodes[0]; function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (constant_value_literal.get() != 1) { @@ -113,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_true_condition_value != 1) { @@ -142,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_false_condition_value != 1) { @@ -154,14 +156,6 @@ public: } private: - static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index a8382930506..610128a5754 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB @@ -75,15 +76,7 @@ public: for (const auto & function_node_argument : function_node_argument_nodes) argument_types.emplace_back(function_node_argument->getResultType()); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node->getFunctionName(), - NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), - properties); - - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 11ebc45a369..d5e4e011cfa 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -175,11 +176,8 @@ public: /// Replace uniq of initial query to count if (match_subquery_with_distinct() || match_subquery_with_group_by()) { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - function_node->getArguments().getNodes().clear(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 53fcf534f64..c193619a35f 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -685,4 +685,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context) +{ + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); +} + +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +{ + chassert(function_node.isAggregateFunction()); + auto old_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get( + function_name, + function_node.getNullsAction(), + argument_types, + old_aggregate_function->getParameters(), + properties); + + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index d3eb6ba3cc2..60f32d6b267 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -102,4 +102,12 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Resolves function node as ordinary function with given name. +/// Arguments and parameters are taken from the node. +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context); + +/// Resolves function node as aggregate function with given name. +/// Arguments and parameters are taken from the node. +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); + } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..b99506e948e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -43,6 +43,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -170,6 +171,7 @@ struct QueryASTSettings struct QueryTreeSettings { bool run_passes = true; + bool dump_tree = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; @@ -179,6 +181,7 @@ struct QueryTreeSettings std::unordered_map> boolean_settings = { {"run_passes", run_passes}, + {"dump_tree", dump_tree}, {"dump_passes", dump_passes}, {"dump_ast", dump_ast} }; @@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); auto settings = checkAndGetSettings(ast.getSettings()); + if (!settings.dump_tree && !settings.dump_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query"); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + bool need_newline = false; if (settings.run_passes) { @@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (settings.dump_passes) { query_tree_pass_manager.dump(buf, pass_index); - if (pass_index > 0) - buf << '\n'; + need_newline = true; } query_tree_pass_manager.run(query_tree, pass_index); + } + + if (settings.dump_tree) + { + if (need_newline) + buf << "\n\n"; query_tree->dumpTree(buf); - } - else - { - query_tree->dumpTree(buf); + need_newline = true; } if (settings.dump_ast) { - buf << '\n'; - buf << '\n'; + if (need_newline) + buf << "\n\n"; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index eaf27a7ae80..3167c2d37dc 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -40,27 +40,16 @@ ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const S return makeASTFunction("sum", makeASTFunction("not", ast)); } -ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg) +const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = { - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("has", ast, arg); -} - -const std::unordered_map> unary_function_to_subcolumn = -{ - {"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, - {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}}, - {"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}}, - {"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}}, - {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, -}; - -const std::unordered_map> binary_function_to_subcolumn -{ - {"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}}, + {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, + {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, + {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, + {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, + {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, + {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, + {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, + {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, }; std::optional getColumnFromArgumentsToOptimize( @@ -116,10 +105,14 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & expected_types_id = std::get<0>(it->second); + if (expected_types_id.contains(column_type_id)) ++data.optimized_identifiers_count[column->name]; } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -131,11 +124,9 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) - ++data.optimized_identifiers_count[column->name]; + ++data.optimized_identifiers_count[column->name]; } } } @@ -148,7 +139,7 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST return; auto column_type_id = column->type->getTypeId(); - const auto & alias = function.tryGetAlias(); + auto alias = function.getAliasOrColumnName(); if (arguments.size() == 1) { @@ -156,8 +147,8 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST if (it == unary_function_to_subcolumn.end()) return; - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) + const auto & [expected_types_id, subcolumn_name, transformer] = it->second; + if (!expected_types_id.contains(column_type_id)) return; ast = transformer(column->name, subcolumn_name); @@ -191,17 +182,10 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it == binary_function_to_subcolumn.end()) - return; - - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) - return; - - ast = transformer(column->name, subcolumn_name, arguments[1]); + auto subcolumn = transformToSubcolumn(column->name, "keys"); + ast = makeASTFunction("has", subcolumn, arguments[1]); ast->setAlias(alias); } } diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference index a1cd31e2dc9..8c4017d6030 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference @@ -2,25 +2,25 @@ 0 1 0 SELECT id IS NULL, - `n.null`, - NOT `n.null` + `n.null` AS `isNull(n)`, + NOT `n.null` AS `isNotNull(n)` FROM t_func_to_subcolumns 3 0 1 0 0 1 0 \N SELECT - `arr.size0`, - `arr.size0` = 0, - `arr.size0` != 0, + `arr.size0` AS `length(arr)`, + `arr.size0` = 0 AS `empty(arr)`, + `arr.size0` != 0 AS `notEmpty(arr)`, empty(n) FROM t_func_to_subcolumns ['foo','bar'] [1,2] [] [] SELECT - `m.keys`, - `m.values` + `m.keys` AS `mapKeys(m)`, + `m.values` AS `mapValues(m)` FROM t_func_to_subcolumns 1 -SELECT sum(NOT `n.null`) +SELECT sum(NOT `n.null`) AS `count(n)` FROM t_func_to_subcolumns 2 SELECT count(id) @@ -30,7 +30,7 @@ FROM t_func_to_subcolumns 3 0 0 SELECT id, - `n.null`, + `n.null` AS `isNull(n)`, right.n IS NULL FROM t_func_to_subcolumns AS left ALL FULL OUTER JOIN diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql index eb0165f4e13..45f83bf20e5 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql @@ -1,6 +1,5 @@ DROP TABLE IF EXISTS t_func_to_subcolumns; -SET allow_experimental_map_type = 1; SET optimize_functions_to_subcolumns = 1; CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference new file mode 100644 index 00000000000..ce5e46fa271 --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -0,0 +1,50 @@ +0 0 1 +0 1 0 +SELECT + __table1.id IS NULL AS `isNull(id)`, + __table1.`n.null` AS `isNull(n)`, + NOT __table1.`n.null` AS `isNotNull(n)` +FROM default.t_func_to_subcolumns AS __table1 +3 0 1 0 +0 1 0 \N +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`arr.size0` = 0 AS `empty(arr)`, + __table1.`arr.size0` != 0 AS `notEmpty(arr)`, + empty(__table1.n) AS `empty(n)` +FROM default.t_func_to_subcolumns AS __table1 +['foo','bar'] [1,2] +[] [] +SELECT + __table1.`m.keys` AS `mapKeys(m)`, + __table1.`m.values` AS `mapValues(m)` +FROM default.t_func_to_subcolumns AS __table1 +1 +SELECT sum(NOT __table1.`n.null`) AS `count(n)` +FROM default.t_func_to_subcolumns AS __table1 +2 +SELECT count(__table1.id) AS `count(id)` +FROM default.t_func_to_subcolumns AS __table1 +1 0 0 +2 1 0 +3 0 0 +SELECT + __table1.id AS id, + __table1.`n.null` AS `isNull(n)`, + __table2.n IS NULL AS `isNull(right.n)` +FROM default.t_func_to_subcolumns AS __table1 +ALL FULL OUTER JOIN +( + + SELECT + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + UNION ALL + SELECT + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 +) AS __table2 USING (id) +0 10 +0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql new file mode 100644 index 00000000000..c1ab6909e2f --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns; + +SET allow_experimental_analyzer = 1; +SET optimize_functions_to_subcolumns = 1; + +CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; + +SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; + +SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; + +SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; + +SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; + +SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +DROP TABLE t_func_to_subcolumns; + +DROP TABLE IF EXISTS t_tuple_null; + +CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_tuple_null VALUES ((10)), ((20)); + +SELECT t IS NULL, t.null FROM t_tuple_null; + +DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference index 975e9876237..e4ae4f951ba 100644 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ b/tests/queries/0_stateless/02115_map_contains.reference @@ -1,4 +1,4 @@ -SELECT has(`m.keys`, \'a\') +SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` FROM t_map_contains 1 0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.reference b/tests/queries/0_stateless/02115_map_contains_analyzer.reference new file mode 100644 index 00000000000..7da5243e727 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.reference @@ -0,0 +1,4 @@ +SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')` +FROM default.t_map_contains AS __table1 +1 +0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.sql b/tests/queries/0_stateless/02115_map_contains_analyzer.sql new file mode 100644 index 00000000000..46e02eca4f0 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_map_contains; + +CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; + +INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains; +SELECT mapContains(m, 'a') FROM t_map_contains; + +DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference index 121b08d02f1..a8004f5e74c 100644 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ b/tests/queries/0_stateless/02116_tuple_element.reference @@ -1,17 +1,17 @@ 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, 1)` FROM t_tuple_element a -SELECT `t1.s` +SELECT `t1.s` AS `tupleElement(t1, 2)` FROM t_tuple_element 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, \'a\')` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 1 2 WITH (1, 2) AS t diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference new file mode 100644 index 00000000000..d30f3a6cc58 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -0,0 +1,25 @@ +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, 1)` +FROM default.t_tuple_element AS __table1 +a +SELECT __table1.`t1.s` AS `tupleElement(t1, 2)` +FROM default.t_tuple_element AS __table1 +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +1 2 +SELECT + 1 AS `tupleElement(t, 1)`, + 2 AS `tupleElement(t, 2)` +FROM system.one AS __table1 +1 2 +SELECT + _CAST(1, \'UInt32\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')` +FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.sql b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql new file mode 100644 index 00000000000..5aeb72c9ee4 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_tuple_element; + +CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +SELECT t1.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element; + +SELECT tupleElement(t1, 2) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element; + +SELECT tupleElement(t1, 'a') FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element; + +SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT t2.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element; + +SELECT tupleElement(t2, 1) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element; + +SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +DROP TABLE t_tuple_element; + +WITH (1, 2) AS t SELECT t.1, t.2; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2; + +WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference new file mode 100644 index 00000000000..4787c660c68 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -0,0 +1,14 @@ +SELECT + `arr.size0` AS `length(arr)`, + `n.null` AS `isNull(n)` +FROM t_column_names +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`n.null` AS `isNull(n)` +FROM default.t_column_names AS __table1 +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql new file mode 100644 index 00000000000..89c39046df3 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_column_names; + +CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory; + +INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +DROP TABLE t_column_names; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference new file mode 100644 index 00000000000..90596ce1000 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -0,0 +1,8 @@ +SELECT `m.size0` AS `length(m)` +FROM t_func_to_subcolumns_map +2 +1 +SELECT __table1.`m.size0` AS `length(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +2 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql new file mode 100644 index 00000000000..b5687696b43 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_map; + +CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +DROP TABLE t_func_to_subcolumns_map; From 368c99f1827acabd32a1ec11dba8711c627cdd53 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 23 Jan 2024 02:46:18 +0000 Subject: [PATCH 010/140] fix crash with analyzer --- ...egateFunctionsArithmericOperationsPass.cpp | 7 ++--- src/Analyzer/Passes/CountDistinctPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 4 +-- .../RewriteAggregateFunctionWithIfPass.cpp | 12 +++------ .../RewriteSumFunctionWithSumAndCountPass.cpp | 4 +-- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 10 +------ src/Analyzer/Passes/UniqToCountPass.cpp | 2 +- src/Analyzer/Utils.cpp | 26 ++++++------------- src/Analyzer/Utils.h | 2 +- 11 files changed, 26 insertions(+), 51 deletions(-) diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index b8a477b8523..a3d3b0ca13a 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -165,12 +165,9 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); - aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNodeByName( - aggregate_function_clone_typed, - result_aggregate_function_name, - {arithmetic_function_clone_argument->getResultType()}); + aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; + resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index a73ca4befcf..45d0301a0fe 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -80,7 +80,7 @@ public: auto result_type = function_node->getResultType(); function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index ac13a505a52..de8b7753700 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -296,7 +296,7 @@ public: resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + resolveAggregateFunctionNodeByName(*function_node, "sum"); } else if (function_name == "isNull") { diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6d9e6765608..1810158a2d7 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -43,14 +43,14 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index b8962e5a4c1..37eb3d98614 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -59,7 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } else if (first_const_node) @@ -79,21 +79,17 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - - resolveAggregateFunctionNodeByName( - function_node, - function_node.getFunctionName() + suffix, - {arguments[0]->getResultType(), arguments[1]->getResultType()}); + resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 2f6674946a3..39f9e3b625b 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -78,11 +78,11 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName()); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName()); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 78d5479843e..e072ba5ad48 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -67,7 +67,7 @@ public: function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (constant_value_literal.get() != 1) { @@ -115,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_true_condition_value != 1) { @@ -144,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_false_condition_value != 1) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 610128a5754..1339fc07ac8 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -68,15 +68,7 @@ public: if (!replaced_argument) return; - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); - - DataTypes argument_types; - argument_types.reserve(function_node_argument_nodes.size()); - - for (const auto & function_node_argument : function_node_argument_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); - - resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName()); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d5e4e011cfa..929c2731e5d 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -177,7 +177,7 @@ public: if (match_subquery_with_distinct() || match_subquery_with_group_by()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index c193619a35f..efada8ef16a 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -528,16 +528,16 @@ private: bool has_function = false; }; -inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) +inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name) { Array parameters; - for (const auto & param : function_node->getParameters()) + for (const auto & param : function_node.getParameters()) { auto * constant = param->as(); parameters.push_back(constant->getValue()); } - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + const auto & function_node_argument_nodes = function_node.getArguments().getNodes(); DataTypes argument_types; argument_types.reserve(function_node_argument_nodes.size()); @@ -547,7 +547,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod AggregateFunctionProperties properties; auto action = NullsAction::EMPTY; - return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); + return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties); } } @@ -628,11 +628,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context) { if (name == "nothing") return; - function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } else if (function_node->isWindowFunction()) { - function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } } @@ -691,19 +691,9 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name) { - chassert(function_node.isAggregateFunction()); - auto old_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_name, - function_node.getNullsAction(), - argument_types, - old_aggregate_function->getParameters(), - properties); - + auto aggregate_function = resolveAggregateFunction(function_node, function_name); function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 60f32d6b267..75d874c1736 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -108,6 +108,6 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Resolves function node as aggregate function with given name. /// Arguments and parameters are taken from the node. -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); } From 7d62e224b50d10188b9876684a8a564b5965347c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 5 Feb 2024 15:31:42 +0100 Subject: [PATCH 011/140] Pass correct Context --- .../Passes/ComparisonTupleEliminationPass.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 42b53f667b4..88da37f014b 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -19,19 +19,18 @@ namespace DB namespace { -class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor +class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext { public: - explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) - : context(std::move(context_)) - {} + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * function_node = node->as(); if (!function_node) @@ -172,13 +171,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext()); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext()); result_function = std::move(not_function); } @@ -198,12 +197,10 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext()); return comparison_function; } - - ContextPtr context; }; } From bac29c0bbafb7a97e033ca08bdc12a9d13914c15 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 15:52:19 +0000 Subject: [PATCH 012/140] add test for variant subcolumn --- .../RewriteFunctionToSubcolumnVisitor.cpp | 6 ++++++ ..._functions_to_subcolumns_variant.reference | 8 +++++++ .../02971_functions_to_subcolumns_variant.sql | 21 +++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 04451947796..437d46c24b2 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -124,6 +124,12 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } + else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) + { + const auto * literal = arguments[1]->as(); + if (literal && literal->value.getType() == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { ++data.optimized_identifiers_count[column->name]; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference new file mode 100644 index 00000000000..7a52155fc2d --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -0,0 +1,8 @@ +SELECT `v.String` AS `variantElement(v, \'String\')` +FROM t_func_to_subcolumns_variant +foo +\N +SELECT __table1.`v.String` AS `variantElement(v, \'String\')` +FROM default.t_func_to_subcolumns_variant AS __table1 +foo +\N diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql new file mode 100644 index 00000000000..1cedd877289 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_variant; + +SET allow_experimental_variant_type = 1; + +CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +DROP TABLE t_func_to_subcolumns_variant; From 46f6867896acec7ee52a25a1e215e78a1eb9365d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 17:37:02 +0000 Subject: [PATCH 013/140] refactor FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 399 ++++++++---------- ...2971_functions_to_subcolumns_map.reference | 16 + .../02971_functions_to_subcolumns_map.sql | 12 + 3 files changed, 210 insertions(+), 217 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 877b0ef7232..f0392a0d9d4 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -25,6 +26,181 @@ namespace DB namespace { +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +{ + /// Replace `length(argument)` with `argument.size0` + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +template +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +{ + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + auto function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + if (value.getType() == Field::Types::UInt64) + return data_type_tuple.getNameByPosition(value.get()); + + return ""; +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + return ""; +} + +template +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +{ + /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. + /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. + + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + if (function_arguments_nodes.size() != 2) + return; + + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (!second_argument_constant_node) + return; + + auto column_type = column_node.getColumnType(); + const auto & data_type_concrete = assert_cast(*column_type); + + auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) + return; + + NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +using NodeToSubcolumnTransformer = std::function; + +std::map, NodeToSubcolumnTransformer> node_transformers = +{ + { + {TypeIndex::Array, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Array, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Map, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "mapKeys"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapValues"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapContains"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + auto column_type = column_node.getColumnType(); + const auto & data_type_map = assert_cast(*column_type); + + NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNodeByName(function_node, "has", context); + }, + }, + { + {TypeIndex::Nullable, "count"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(function_node, "sum"); + }, + }, + { + {TypeIndex::Nullable, "isNull"}, + [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Nullable, "isNotNull"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; + resolveOrdinaryFunctionNodeByName(function_node, "not", context); + }, + }, + { + {TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement, + }, + { + {TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement, + }, +}; + std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) { auto * function_node = node->as(); @@ -161,54 +337,13 @@ private: void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { - const auto & function_arguments_nodes = function_node.getArguments().getNodes(); - const auto & function_name = function_node.getFunctionName(); - auto column = first_argument_column_node.getColumn(); - WhichDataType column_type(column.type); - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isNullable()) - { - if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isMap()) - { - if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") - ++data.optimized_identifiers_count[qualified_name]; - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - const auto & constant_value = second_argument_constant_node->getValue(); - const auto & constant_value_type = constant_value.getType(); - - if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "variantElement" && column_type.isVariant() && second_argument_constant_node) - { - if (second_argument_constant_node->getValue().getType() == Field::Types::String) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "mapContains" && column_type.isMap()) - { - ++data.optimized_identifiers_count[qualified_name]; - } - } + if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) + ++data.optimized_identifiers_count[qualified_name]; } }; @@ -236,9 +371,6 @@ public: if (!function_node || !first_argument_column_node || !table_node) return; - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - const auto & function_name = function_node->getFunctionName(); - auto column = first_argument_column_node->getColumn(); auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); @@ -246,176 +378,9 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length") - { - /// Replace `length(array_argument)` with `array_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "empty") - { - /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); - } - else if (function_name == "notEmpty") - { - /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); - } - } - else if (column_type.isNullable()) - { - if (function_name == "count") - { - /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - column.name += ".null"; - column.type = std::make_shared(); - - auto column_node = std::make_shared(column, column_source); - auto function_node_not = std::make_shared("not"); - - function_node_not->getArguments().getNodes().push_back(std::move(column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); - - function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum"); - } - else if (function_name == "isNull") - { - /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - column.name += ".null"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "isNotNull") - { - /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - column.name += ".null"; - column.type = std::make_shared(); - - function_arguments_nodes = {std::make_shared(column, column_source)}; - - resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); - } - } - else if (column_type.isMap()) - { - if (function_name == "length") - { - /// Replace `length(map_argument)` with `map_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapKeys") - { - /// Replace `mapKeys(map_argument)` with `map_argument.keys` - column.name += ".keys"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapValues") - { - /// Replace `mapValues(map_argument)` with `map_argument.values` - column.name += ".values"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); - - const auto & data_type_tuple = assert_cast(*column.type); - - String subcolumn_name; - - if (tuple_element_constant_value_type == Field::Types::String) - { - subcolumn_name = tuple_element_constant_value.get(); - } - else if (tuple_element_constant_value_type == Field::Types::UInt64) - { - auto tuple_column_index = tuple_element_constant_value.get(); - subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); - } - else - { - return; - } - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node) - { - /// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`. - const auto & variant_element_constant_value = second_argument_constant_node->getValue(); - String subcolumn_name; - - if (variant_element_constant_value.getType() != Field::Types::String) - return; - - subcolumn_name = variant_element_constant_value.get(); - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapContains" && column_type.isMap()) - { - const auto & data_type_map = assert_cast(*column.type); - - /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - column.name += ".keys"; - column.type = std::make_shared(data_type_map.getKeyType()); - - auto has_function_argument = std::make_shared(column, column_source); - function_arguments_nodes[0] = std::move(has_function_argument); - - resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); - } - } + auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) + transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); } }; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 90596ce1000..50f21842ac1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -2,7 +2,23 @@ SELECT `m.size0` AS `length(m)` FROM t_func_to_subcolumns_map 2 1 +SELECT `m.size0` = 0 AS `empty(m)` +FROM t_func_to_subcolumns_map +0 +0 +SELECT `m.size0` != 0 AS `notEmpty(m)` +FROM t_func_to_subcolumns_map +1 +1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 1 +SELECT __table1.`m.size0` = 0 AS `empty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +0 +0 +SELECT __table1.`m.size0` != 0 AS `notEmpty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +1 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index b5687696b43..c574e1033c0 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -10,10 +10,22 @@ SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + DROP TABLE t_func_to_subcolumns_map; From 361b5a20771b17305b7d11e626c6f9eba9b77fe3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Feb 2024 18:19:15 +0000 Subject: [PATCH 014/140] more refactoring of FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 86 ++++++++++--------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index f0392a0d9d4..954ae6df13e 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -26,31 +26,40 @@ namespace DB namespace { -void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +struct ColumnContext +{ + NameAndTypePair column; + QueryTreeNodePtr column_source; + ContextPtr context; +}; + +using NodeToSubcolumnTransformer = std::function; + +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `length(argument)` with `argument.size0` /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); } template -void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -73,7 +82,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) } template -void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. @@ -86,19 +95,16 @@ void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & funct if (!second_argument_constant_node) return; - auto column_type = column_node.getColumnType(); - const auto & data_type_concrete = assert_cast(*column_type); - + const auto & data_type_concrete = assert_cast(*ctx.column.type); auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) return; - NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); } -using NodeToSubcolumnTransformer = std::function; - std::map, NodeToSubcolumnTransformer> node_transformers = { { @@ -121,52 +127,51 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Map, "mapKeys"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapKeys(map_argument)` with `map_argument.keys` - NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapValues"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapValues(map_argument)` with `map_argument.values` - NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapContains"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - auto column_type = column_node.getColumnType(); - const auto & data_type_map = assert_cast(*column_type); + const auto & data_type_map = assert_cast(*ctx.column.type); - NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + auto has_function_argument = std::make_shared(column, ctx.column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNodeByName(function_node, "has", context); + resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context); }, }, { {TypeIndex::Nullable, "count"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto new_column_node = std::make_shared(column, ctx.column_source); auto function_node_not = std::make_shared("not"); function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context); function_arguments_nodes = {std::move(function_node_not)}; resolveAggregateFunctionNodeByName(function_node, "sum"); @@ -174,23 +179,23 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Nullable, "isNull"}, - [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Nullable, "isNotNull"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; - resolveOrdinaryFunctionNodeByName(function_node, "not", context); + function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; + resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context); }, }, { @@ -380,7 +385,10 @@ public: auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); if (transformer_it != node_transformers.end()) - transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); + { + ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; + transformer_it->second(node, *function_node, ctx); + } } }; From 1da258bfda0fcae33451efd670e8486910052c40 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:35:03 +0000 Subject: [PATCH 015/140] better functions to subcolumns optimization --- .../Passes/FunctionToSubcolumnsPass.cpp | 96 ++++++----- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- ...functions_to_subcolumns_analyzer.reference | 149 +++++++++++++++++- ...01872_functions_to_subcolumns_analyzer.sql | 12 +- ...03_functions_to_subcolumns_final.reference | 25 +++ .../03003_functions_to_subcolumns_final.sql | 23 +++ 6 files changed, 242 insertions(+), 65 deletions(-) create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 4ebcd59d8ec..8ba33a50ccf 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -58,8 +58,8 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); + const auto * function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -246,24 +246,11 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - struct Data - { - bool has_final = false; - std::unordered_set all_key_columns; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - Data getData() const { return data; } - void enterImpl(const QueryTreeNodePtr & node) { if (!getSettings().optimize_functions_to_subcolumns) return; - if (data.has_final) - return; - if (auto * table_node = node->as()) { enterImpl(*table_node); @@ -284,18 +271,45 @@ public: } } + std::unordered_set getIdentifiersToOptimize() const + { + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + std::unordered_set identifiers_to_optimize; + for (const auto & [identifier, count] : optimized_identifiers_count) + { + if (all_key_columns.contains(identifier)) + continue; + + auto it = identifiers_count.find(identifier); + if (it != identifiers_count.end() && it->second == count) + identifiers_to_optimize.insert(identifier); + } + + return identifiers_to_optimize; + } + private: - Data data; + std::unordered_set all_key_columns; + std::unordered_map identifiers_count; + std::unordered_map optimized_identifiers_count; NameSet processed_tables; void enterImpl(const TableNode & table_node) { - if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) - { - data.has_final = true; - return; - } - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); if (processed_tables.emplace(table_name).second) return; @@ -305,7 +319,7 @@ private: for (const auto & column_name : key_columns) { Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); + all_key_columns.insert(identifier); } }; @@ -337,18 +351,23 @@ private: auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column_node.getColumnName()}); - ++data.indentifiers_count[qualified_name]; + ++identifiers_count[qualified_name]; } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + return; + auto column = first_argument_column_node.getColumn(); auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column.name}); if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) - ++data.optimized_identifiers_count[qualified_name]; + ++optimized_identifiers_count[qualified_name]; } }; @@ -398,32 +417,7 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPt { FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); - auto data = first_visitor.getData(); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (data.has_final) - return; - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - std::unordered_set identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); + auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize(); if (identifiers_to_optimize.empty()) return; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a8041b5b0a9..c73ff524d1f 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -88,7 +88,7 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); - auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; + const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index ce5e46fa271..e409e9ad89f 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -1,5 +1,24 @@ 0 0 1 0 1 0 +QUERY id: 0 + PROJECTION COLUMNS + isNull(id) UInt8 + isNull(n) UInt8 + isNotNull(n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 + FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.id IS NULL AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, @@ -7,6 +26,32 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 3 0 1 0 0 1 0 \N +QUERY id: 0 + PROJECTION COLUMNS + length(arr) UInt64 + empty(arr) UInt8 + notEmpty(arr) UInt8 + empty(n) Nullable(UInt8) + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: arr.size0, result_type: UInt64, source_id: 3 + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 12, function_name: empty, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: n, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`arr.size0` = 0 AS `empty(arr)`, @@ -15,19 +60,106 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ['foo','bar'] [1,2] [] [] +QUERY id: 0 + PROJECTION COLUMNS + mapKeys(m) Array(String) + mapValues(m) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: m.keys, result_type: Array(String), source_id: 3 + COLUMN id: 4, column_name: m.values, result_type: Array(UInt64), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`m.keys` AS `mapKeys(m)`, __table1.`m.values` AS `mapValues(m)` FROM default.t_func_to_subcolumns AS __table1 1 +QUERY id: 0 + PROJECTION COLUMNS + count(n) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT sum(NOT __table1.`n.null`) AS `count(n)` FROM default.t_func_to_subcolumns AS __table1 2 +QUERY id: 0 + PROJECTION COLUMNS + count(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT count(__table1.id) AS `count(id)` FROM default.t_func_to_subcolumns AS __table1 1 0 0 2 1 0 3 0 0 +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + isNull(n) UInt8 + isNull(right.n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 + FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + JOIN TREE + JOIN id: 9, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + RIGHT TABLE EXPRESSION + UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + QUERIES + LIST id: 10, nodes: 2 + QUERY id: 11, alias: __table3 + PROJECTION COLUMNS + id UInt8 + n String + PROJECTION + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + JOIN TREE + TABLE id: 15, alias: __table4, table_name: system.one + QUERY id: 16, alias: __table5 + PROJECTION COLUMNS + id UInt8 + \'www\' String + PROJECTION + LIST id: 17, nodes: 2 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: \'www\', constant_value_type: String + JOIN TREE + TABLE id: 20, alias: __table6, table_name: system.one + JOIN EXPRESSION + LIST id: 21, nodes: 1 + COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + EXPRESSION + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, @@ -35,16 +167,19 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( - + ( SELECT - 1 AS id, - \'qqq\' AS n - FROM system.one AS __table4 + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + ) UNION ALL + ( SELECT - 3 AS id, - \'www\' AS `\'www\'` - FROM system.one AS __table6 + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 + ) ) AS __table2 USING (id) 0 10 0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql index c1ab6909e2f..b544f6829cf 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -9,24 +9,24 @@ ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); DROP TABLE t_func_to_subcolumns; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference new file mode 100644 index 00000000000..3051c199363 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference @@ -0,0 +1,25 @@ +3 +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 +) +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) +2 +SELECT length(__table1.arr) AS `length(arr)` +FROM default.t_length_1 AS __table1 +FINAL +WHERE length(__table1.arr) IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql new file mode 100644 index 00000000000..5975347ad09 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t_length_1; +DROP TABLE IF EXISTS t_length_2; + +SET allow_experimental_analyzer = 1; +SET optimize_on_insert = 0; + +CREATE TABLE t_length_1 (id UInt64, arr Array(UInt64)) ENGINE = ReplacingMergeTree ORDER BY id; +CREATE TABLE t_length_2 (id UInt64, arr_length UInt64) ENGINE = ReplacingMergeTree ORDER BY id; + +INSERT INTO t_length_1 VALUES (1, [1, 2, 3]), (2, [4, 5]); +INSERT INTO t_length_2 VALUES (1, 3), (1, 2), (2, 2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +DROP TABLE t_length_1; +DROP TABLE t_length_2; From c0dd9b13aa09085a03b0e54b67ed4ff5c46f4336 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:39:39 +0000 Subject: [PATCH 016/140] update docs --- docs/en/operations/settings/settings.md | 8 ++++---- docs/ru/operations/settings/settings.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 622644a1543..2273aa8c472 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1776,7 +1776,7 @@ Default value: 0 (no restriction). ## insert_quorum {#insert_quorum} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables the quorum writes. @@ -1819,7 +1819,7 @@ See also: ## insert_quorum_parallel {#insert_quorum_parallel} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected. @@ -1840,7 +1840,7 @@ See also: ## select_sequential_consistency {#select_sequential_consistency} :::note -This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. +This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. ::: Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default). @@ -2504,7 +2504,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. -Default value: `0`. +Default value: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a56afda641b..f4eecc615b2 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2077,7 +2077,7 @@ SELECT * FROM test_table - 0 — оптимизация отключена. - 1 — оптимизация включена. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} @@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings; ``` ### output_format_pretty_color {#output_format_pretty_color} -Включает/выключает управляющие последовательности ANSI в форматах Pretty. +Включает/выключает управляющие последовательности ANSI в форматах Pretty. Возможные значения: @@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне. -При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно. From ed3c36debefe6c8a59f8482c75ef9f70c27c9bc3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 19:59:22 +0000 Subject: [PATCH 017/140] fix tests --- src/Core/SettingsChangesHistory.h | 3 +++ .../0_stateless/02116_tuple_element_analyzer.reference | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..afb9b201f50 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", { + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + }}, {"24.2", { {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference index d30f3a6cc58..22d48ffb2f3 100644 --- a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -15,8 +15,8 @@ SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` FROM default.t_tuple_element AS __table1 1 2 SELECT - 1 AS `tupleElement(t, 1)`, - 2 AS `tupleElement(t, 2)` + _CAST(1, \'UInt8\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt8\') AS `tupleElement(t, 2)` FROM system.one AS __table1 1 2 SELECT From 7ac0ebbaca2c9032b04a6e77456e1c5b7f325f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 16:11:12 +0100 Subject: [PATCH 018/140] Test jeaiii itoa --- base/base/itoa.cpp | 522 +++++++++++++++++++++------------------------ 1 file changed, 241 insertions(+), 281 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index fd8fd8de025..4587d3e3e82 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -1,296 +1,256 @@ -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include #include #include #include #include -namespace +namespace jeaiii { -template -ALWAYS_INLINE inline constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} +/* + MIT License -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." + Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + using u32 = decltype(0xffffffff); + using u64 = decltype(0xffffffffffffffff); - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 - >::Result; - -template -using UnsignedOfSize = typename SelectType::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - -ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) -{ - *p = '0' + value; - ++p; - return p; -} - -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - -namespace convert -{ -template -char * head(char * p, UInt u); -template -char * tail(char * p, UInt u); - -//===----------------------------------------------------------===// -// head: find most significant digit, skip leading zeros -//===----------------------------------------------------------===// - -// "x" contains quotient and remainder after division by 10^N -// quotient is less than 10^N -template -ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) -{ - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * head(char * p, UInt u) -{ - return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) -{ - return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// tail: produce all digits including leading zeros -//===----------------------------------------------------------===// - -// recursive step, "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * tail(char * p, UInt u) -{ - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) -{ - return outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// large values are >= 10^2*N -// where x contains quotient and remainder after division by 10^N -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) -{ - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; -} - -//===----------------------------------------------------------===// -// handle values of "u" that might be >= 10^2*N -// where N is the size of "u" in bytes -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * uitoa(char * p, UInt u) -{ - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); - - return u < pow10>(2 * N) ? head(p, x) : large(p, x); -} - -// selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) -{ - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); - else + struct pair { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; + char dd[2]; + constexpr pair(char c) : dd{ c, '\0' } { } + constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } + }; + + constexpr struct + { + pair dd[100] + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + pair fd[100] + { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + } + digits; + + constexpr u64 mask24 = (u64(1) << 24) - 1; + constexpr u64 mask32 = (u64(1) << 32) - 1; + constexpr u64 mask57 = (u64(1) << 57) - 1; + + template struct _cond { using type = F; }; + template struct _cond { using type = T; }; + template using cond = typename _cond::type; + + template + inline ALWAYS_INLINE + char* to_text_from_integer(char* b, T i) + { + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < u32(1e2)) + { + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; + } + if (n < u32(1e6)) + { + if (n < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; + } + auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < u64(1ull << 32ull)) + { + if (n < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; + } + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32 z = n % u32(1e8); + u64 u = n / u32(1e8); + + if (u < u32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < u32(1e6)) + { + if (u < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + else + { + auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < u64(1ull << 32ull)) + { + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + u32 y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } } -//===----------------------------------------------------------===// -// handle unsigned and signed integral operands -//===----------------------------------------------------------===// - -// itoa: handle unsigned integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(U u, char * p) +namespace { - return convert::uitoa(p, u); -} - -// itoa: handle signed integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(I i, char * p) +ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; -} + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; @@ -301,7 +261,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { /// If we the highest 64bit item is empty, we can print just the lowest item as u64 if (_x.items[UInt128::_impl::little(1)] == 0) - return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]); /// Doing operations using __int128 is faster and we already rely on this feature using T = unsigned __int128; @@ -332,7 +292,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) current_block += max_multiple_of_hundred_blocks; } - char * highest_part_print = convert::itoa(uint64_t(x), p); + char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x)); for (int i = 0; i < current_block; i++) { outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); @@ -448,12 +408,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) char * itoa(UInt8 i, char * p) { - return convert::itoa(uint8_t(i), p); + return jeaiii::to_text_from_integer(p, uint8_t(i)); } char * itoa(Int8 i, char * p) { - return convert::itoa(int8_t(i), p); + return jeaiii::to_text_from_integer(p, int8_t(i)); } char * itoa(UInt128 i, char * p) @@ -479,7 +439,7 @@ char * itoa(Int256 i, char * p) #define DEFAULT_ITOA(T) \ char * itoa(T i, char * p) \ { \ - return convert::itoa(i, p); \ + return jeaiii::to_text_from_integer(p, i); \ } #define FOR_MISSING_INTEGER_TYPES(M) \ From 225db5e253f578483bcbeeb8f3063c241382d49e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:04:52 +0100 Subject: [PATCH 019/140] Style --- base/base/itoa.cpp | 386 +++++++++++++++++++++++---------------------- 1 file changed, 194 insertions(+), 192 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 4587d3e3e82..868fdedb176 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -28,229 +28,231 @@ namespace jeaiii OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - using u32 = decltype(0xffffffff); - using u64 = decltype(0xffffffffffffffff); +struct pair +{ + char dd[2]; + constexpr pair(char c) : dd{c, '\0'} { } + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } +}; - struct pair - { - char dd[2]; - constexpr pair(char c) : dd{ c, '\0' } { } - constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } +constexpr struct +{ + pair dd[100]{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // }; + pair fd[100]{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; +} digits; - constexpr struct +constexpr UInt64 mask24 = (UInt64(1) << 24) - 1; +constexpr UInt64 mask32 = (UInt64(1) << 32) - 1; +constexpr UInt64 mask57 = (UInt64(1) << 57) - 1; + +template +struct _cond +{ + using type = F; +}; +template +struct _cond +{ + using type = T; +}; +template +using cond = typename _cond::type; + +template +inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) +{ + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < UInt32(1e2)) { - pair dd[100] - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; - pair fd[100] - { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; } - digits; - - constexpr u64 mask24 = (u64(1) << 24) - 1; - constexpr u64 mask32 = (u64(1) << 32) - 1; - constexpr u64 mask57 = (u64(1) << 57) - 1; - - template struct _cond { using type = F; }; - template struct _cond { using type = T; }; - template using cond = typename _cond::type; - - template - inline ALWAYS_INLINE - char* to_text_from_integer(char* b, T i) + if (n < UInt32(1e6)) { - constexpr auto q = sizeof(T); - using U = cond>>; - - // convert bool to int before test with unary + to silence warning if T happens to be bool - U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - - if (n < u32(1e2)) + if (n < UInt32(1e4)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; } - if (n < u32(1e6)) + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < UInt64(1ull << 32ull)) + { + if (n < UInt32(1e8)) { - if (n < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - return b + 4; - } - auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e5); + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e7); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - return b + 6; - } - if (n < u64(1ull << 32ull)) - { - if (n < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; - } - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= n < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - return b + 10; - } - - // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings - u32 z = n % u32(1e8); - u64 u = n / u32(1e8); - - if (u < u32(1e2)) - { - // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else if (u < u32(1e6)) - { - if (u < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - else - { - auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e5); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - b += 6; - } - } - else if (u < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } - else if (u < u64(1ull << 32ull)) + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings + UInt32 z = n % UInt32(1e8); + UInt64 u = n / UInt32(1e8); + + if (u < UInt32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < UInt32(1e6)) + { + if (u < UInt32(1e4)) { - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= u < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - b += 10; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } else { - u32 y = u % u32(1e8); - u /= u32(1e8); - - // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) - if (u < u32(1e2)) - { - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e5); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < UInt32(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < UInt64(1ull << 32ull)) + { + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + UInt32 y = u % UInt32(1e8); + u /= UInt32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < UInt32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; } + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; +} } namespace { ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From de76be248b4f2b667fcd390874af5b296e91686f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:56:01 +0100 Subject: [PATCH 020/140] Revert incorrect change on my part --- base/base/itoa.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 868fdedb176..e7250764704 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -250,9 +250,24 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) namespace { -ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From c4fcc5946831da07bb148a1299bd2c7099c221a7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 Mar 2024 16:59:30 +0000 Subject: [PATCH 021/140] fix test --- tests/queries/0_stateless/02116_tuple_element.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index ece7114e763..e3a5134f2b2 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; From 891277e01e99f21f4087c947a4fb3d448473562e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 25 Mar 2024 14:57:43 +0000 Subject: [PATCH 022/140] fix clang-tidy --- src/Storages/IStorageCluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index 36ded8d5412..d219eb32f45 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -37,7 +37,7 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override final { return true; } + bool isRemote() const final { return true; } bool supportsSubcolumns() const override { return true; } bool supportsOptimizationToSubcolumns() const override { return false; } From aa36b039c1fcec8881faee73083beb007d7b83a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 26 Mar 2024 15:02:25 +0000 Subject: [PATCH 023/140] fix test --- .../02971_functions_to_subcolumns_column_names.reference | 8 ++------ .../02971_functions_to_subcolumns_column_names.sql | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 4787c660c68..03c16267db1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -2,13 +2,9 @@ SELECT `arr.size0` AS `length(arr)`, `n.null` AS `isNull(n)` FROM t_column_names -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` FROM default.t_column_names AS __table1 -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index 89c39046df3..b867148c8ca 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -8,12 +8,12 @@ SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; DROP TABLE t_column_names; From fb4a230eee1051cd17d627c370eb1ffe318f2dd5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 30 Apr 2024 10:44:25 +0200 Subject: [PATCH 024/140] Support reading partitioed DeltaLake columns --- src/Processors/Chunk.cpp | 4 +- .../DataLakes/DeltaLakeMetadataParser.cpp | 265 ++++++++++++++++-- .../DataLakes/DeltaLakeMetadataParser.h | 13 +- src/Storages/DataLakes/HudiMetadataParser.cpp | 19 +- src/Storages/DataLakes/HudiMetadataParser.h | 11 +- src/Storages/DataLakes/IStorageDataLake.h | 112 +++++--- .../DataLakes/Iceberg/StorageIceberg.cpp | 8 +- .../DataLakes/Iceberg/StorageIceberg.h | 4 +- src/Storages/DataLakes/PartitionColumns.h | 17 ++ src/Storages/DataLakes/S3MetadataReader.cpp | 7 +- src/Storages/StorageS3.cpp | 53 +++- src/Storages/StorageS3.h | 9 +- src/TableFunctions/ITableFunction.cpp | 2 +- src/TableFunctions/ITableFunctionDataLake.h | 15 +- src/TableFunctions/TableFunctionS3.cpp | 2 +- src/TableFunctions/TableFunctionS3Cluster.cpp | 2 +- tests/integration/test_storage_delta/test.py | 95 +++++++ 17 files changed, 534 insertions(+), 104 deletions(-) create mode 100644 src/Storages/DataLakes/PartitionColumns.h diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..8e3ca0b03b3 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() - 1); + position, columns.size() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() - 1)); + toString(position), toString(columns.size() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 14a912a180d..50b6ca83cb4 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -17,6 +17,22 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace fs = std::filesystem; @@ -26,6 +42,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; } @@ -65,9 +82,17 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles(const Configuration & configuration, ContextPtr context) + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles(const Configuration & configuration, ContextPtr context) { std::set result_files; + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context); if (checkpoint_version) @@ -81,7 +106,7 @@ struct DeltaLakeMetadataParser::Impl if (!MetadataReadHelper::exists(file_path, configuration)) break; - processMetadataFile(file_path, result_files, configuration, context); + processMetadataFile(file_path, result_files, current_schema, current_partition_columns, configuration, context); } LOG_TRACE( @@ -94,10 +119,10 @@ struct DeltaLakeMetadataParser::Impl configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, configuration, context); + processMetadataFile(key, result_files, current_schema, current_partition_columns, configuration, context); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -132,6 +157,8 @@ struct DeltaLakeMetadataParser::Impl void processMetadataFile( const String & key, std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, const Configuration & configuration, ContextPtr context) { @@ -153,20 +180,217 @@ struct DeltaLakeMetadataParser::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + if (object->has("add")) { - const auto path = json["add"]["path"].getString(); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); result.insert(fs::path(configuration.getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & name : partition_values->getNames()) + { + const auto value = partition_values->getValue(name); + auto name_and_type = file_schema.tryGetByName(name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", name, value, filename); + } + } + } } - else if (json.has("remove")) + else if (object->has("remove")) { - const auto path = json["remove"]["path"].getString(); + auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration.getPath()) / path); } + if (file_schema.empty()) + { + // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + // object->stringify(oss); + // LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + file_schema.push_back({name, getFieldType(field, "type", is_nullable)}); + } + } + } + /// TODO: Check if schema in each file is the same? } } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + // else if (which.isDecimal32()) + // return parse(value); + // else if (which.isDecimal64()) + // return parse(value); + // else if (which.isDecimal128()) + // return parse(value); + // else if (which.isDecimal256()) + // return parse(value); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -272,8 +496,8 @@ struct DeltaLakeMetadataParser::Impl arrow::default_memory_pool(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); + std::shared_ptr file_schema; + THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); ArrowColumnToCHColumn column_reader( header, "Parquet", @@ -318,20 +542,19 @@ struct DeltaLakeMetadataParser::Impl template -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) -{ -} - -template -Strings DeltaLakeMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) +DeltaLakeMetadataParser::DeltaLakeMetadataParser(const Configuration & configuration, ContextPtr context) + : impl(std::make_unique()) { auto result = impl->processMetadataFiles(configuration, context); - return Strings(result.begin(), result.end()); + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; + + LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } -template DeltaLakeMetadataParser::DeltaLakeMetadataParser(); -template Strings DeltaLakeMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); +template DeltaLakeMetadataParser::DeltaLakeMetadataParser(const StorageS3::Configuration & configuration, ContextPtr context); } #endif diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index df7276b90b4..58cf7acd2a3 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include "PartitionColumns.h" namespace DB { @@ -10,13 +12,20 @@ template struct DeltaLakeMetadataParser { public: - DeltaLakeMetadataParser(); + DeltaLakeMetadataParser(const Configuration & configuration, ContextPtr context); - Strings getFiles(const Configuration & configuration, ContextPtr context); + Strings getFiles() { return data_files; } + + NamesAndTypesList getTableSchema() const { return schema; } + + DataLakePartitionColumns getPartitionColumns() const { return partition_columns; } private: struct Impl; std::shared_ptr impl; + NamesAndTypesList schema; + DataLakePartitionColumns partition_columns; + Strings data_files; }; } diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index 699dfe8fda0..ad66ba69258 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -61,7 +61,7 @@ struct HudiMetadataParser::Impl String key; UInt64 timestamp = 0; }; - std::unordered_map> data_files; + std::unordered_map> files; for (const auto & key : keys) { @@ -76,7 +76,7 @@ struct HudiMetadataParser::Impl const auto & file_id = file_parts[0]; const auto timestamp = parse(file_parts[2]); - auto & file_info = data_files[partition][file_id]; + auto & file_info = files[partition][file_id]; if (file_info.timestamp == 0 || file_info.timestamp < timestamp) { file_info.key = std::move(key); @@ -85,7 +85,7 @@ struct HudiMetadataParser::Impl } Strings result; - for (auto & [partition, partition_data] : data_files) + for (auto & [partition, partition_data] : files) { LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); for (auto & [file_id, file_data] : partition_data) @@ -97,19 +97,12 @@ struct HudiMetadataParser::Impl template -HudiMetadataParser::HudiMetadataParser() : impl(std::make_unique()) +HudiMetadataParser::HudiMetadataParser(const Configuration & configuration, ContextPtr) : impl(std::make_unique()) { + data_files = impl->processMetadataFiles(configuration); } -template -Strings HudiMetadataParser::getFiles(const Configuration & configuration, ContextPtr) -{ - return impl->processMetadataFiles(configuration); -} - -template HudiMetadataParser::HudiMetadataParser(); -template Strings HudiMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); +template HudiMetadataParser::HudiMetadataParser(const StorageS3::Configuration & configuration, ContextPtr); } diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 6727ba2f718..9e2901a9d24 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include "PartitionColumns.h" namespace DB { @@ -10,13 +12,18 @@ template struct HudiMetadataParser { public: - HudiMetadataParser(); + HudiMetadataParser(const Configuration & configuration, ContextPtr context); - Strings getFiles(const Configuration & configuration, ContextPtr context); + Strings getFiles() { return data_files; } + + NamesAndTypesList getTableSchema() const { return {}; } + + DataLakePartitionColumns getPartitionColumns() const { return {}; } private: struct Impl; std::shared_ptr impl; + Strings data_files; }; } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 711abbde38c..be23c017043 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -9,6 +9,7 @@ #include #include #include +#include "PartitionColumns.h" #include @@ -23,15 +24,51 @@ public: using Configuration = typename Storage::Configuration; template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward(args)...) - , base_configuration(configuration_) - , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + static StoragePtr create( + const Configuration & configuration_, + ContextPtr context_, + LoadingStrictnessLevel mode, + const ColumnsDescription & columns_, + Args && ...args) + { + std::unique_ptr metadata; + Configuration read_configuration; + Configuration base_configuration{configuration_}; + try + { + base_configuration.update(context_); + metadata = std::make_unique(base_configuration, context_); + read_configuration = getConfigurationForDataRead(*metadata, base_configuration, context_); + } + catch (...) + { + if (mode <= LoadingStrictnessLevel::CREATE) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared>( + configuration_, + read_configuration, + context_, + columns_.empty() && metadata ? ColumnsDescription(metadata->getTableSchema()) : columns_, + std::forward(args)...); + } template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) + explicit IStorageDataLake( + const Configuration & base_configuration_, + const Configuration & read_configuration_, + ContextPtr context_, + const ColumnsDescription & columns_, + Args && ...args) + : Storage(read_configuration_, + context_, + columns_, + std::forward(args)...) + , base_configuration(base_configuration_) + , log(getLogger(getName())) // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) { - return std::make_shared>(configuration_, context_, mode, std::forward(args)...); } String getName() const override { return name; } @@ -41,8 +78,18 @@ public: const std::optional & format_settings, const ContextPtr & local_context) { - auto configuration = getConfigurationForDataRead(base_configuration, local_context); - return Storage::getTableStructureFromData(configuration, format_settings, local_context); + base_configuration.update(local_context); + auto metadata = std::make_unique(base_configuration, local_context); + auto schema = metadata->getTableSchema(); + if (!schema.empty()) + { + return ColumnsDescription(schema); + } + else + { + auto read_configuration = getConfigurationForDataRead(*metadata, base_configuration, local_context); + return Storage::getTableStructureFromData(read_configuration, format_settings, local_context); + } } static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context) @@ -65,51 +112,31 @@ public: private: static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {}, - LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE) + MetadataParser & metadata_, + const Configuration & base_configuration, + const ContextPtr & local_context) { auto configuration{base_configuration}; configuration.update(local_context); configuration.static_configuration = true; - - try - { - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - getLogger("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - return configuration; - } - } - - static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context) - { - return MetadataParser().getFiles(configuration, local_context); + configuration.keys = metadata_.getFiles(); + configuration.connect(local_context); + return configuration; } void updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); - auto new_keys = getDataFiles(base_configuration, local_context); + + auto metadata = MetadataParser(base_configuration, local_context); + auto new_keys = metadata.getFiles(); + Storage::partition_columns = metadata.getPartitionColumns(); if (!updated && new_keys == Storage::getConfiguration().keys) return; - Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); + auto read_configuration = getConfigurationForDataRead(metadata, base_configuration, local_context); + Storage::useConfiguration(read_configuration); } Configuration base_configuration; @@ -127,8 +154,9 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints, - args.comment, getFormatSettings(args.getContext())); + return DataLake::create(configuration, args.getContext(), args.mode, + args.columns, args.table_id, args.constraints, + args.comment, getFormatSettings(args.getContext())); } } diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 19cd97c3d4f..6557b67d20c 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -9,8 +9,8 @@ StoragePtr StorageIceberg::create( const DB::StorageIceberg::Configuration & base_configuration, DB::ContextPtr context_, LoadingStrictnessLevel mode, - const DB::StorageID & table_id_, const DB::ColumnsDescription & columns_, + const DB::StorageID & table_id_, const DB::ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_) @@ -36,8 +36,8 @@ StoragePtr StorageIceberg::create( std::move(metadata), configuration, context_, - table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + table_id_, constraints_, comment, format_settings_); @@ -47,12 +47,12 @@ StorageIceberg::StorageIceberg( std::unique_ptr metadata_, const Configuration & configuration_, ContextPtr context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_) - : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) + : StorageS3(configuration_, context_, columns_, table_id_, constraints_, comment, format_settings_) , current_metadata(std::move(metadata_)) , base_configuration(configuration_) { diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index 45cbef0b41b..a95e203aa72 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -31,8 +31,8 @@ public: static StoragePtr create(const Configuration & base_configuration, ContextPtr context_, LoadingStrictnessLevel mode, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_); @@ -41,8 +41,8 @@ public: std::unique_ptr metadata_, const Configuration & configuration_, ContextPtr context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_); diff --git a/src/Storages/DataLakes/PartitionColumns.h b/src/Storages/DataLakes/PartitionColumns.h new file mode 100644 index 00000000000..604dbbf78fa --- /dev/null +++ b/src/Storages/DataLakes/PartitionColumns.h @@ -0,0 +1,17 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct DataLakePartitionColumn +{ + NameAndTypePair name_and_type; + Field value; +}; + +/// Data file -> partition columns +using DataLakePartitionColumns = std::unordered_map>; + +} diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index d66e21550a3..6a00ae1e452 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -45,12 +45,16 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( const auto & bucket = base_configuration.url.bucket; const auto & client = base_configuration.client; + auto path = std::filesystem::path(table_path) / prefix; + std::vector res; S3::ListObjectsV2Request request; Aws::S3::Model::ListObjectsV2Outcome outcome; request.SetBucket(bucket); - request.SetPrefix(std::filesystem::path(table_path) / prefix); + request.SetPrefix(path); + + LOG_TEST(getLogger("S3DataLakeMetadataReadHelper"), "Listing files in {}", path.string()); bool is_finished{false}; while (!is_finished) @@ -69,6 +73,7 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( for (const auto & obj : result_batch) { const auto & filename = obj.GetKey(); + LOG_TEST(getLogger("S3DataLakeMetadataReadHelper"), "Listed file: {} (searching for suffix: {})", filename, suffix); if (filename.ends_with(suffix)) res.push_back(filename); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 0371a9de08a..205b5d3381e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -148,20 +148,23 @@ public: const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, + const StorageS3::Configuration & query_configuration_, StorageS3 & storage_, ReadFromFormatInfo read_from_format_info_, bool need_only_count_, size_t max_block_size_, - size_t num_streams_) + size_t num_streams_, + const DataLakePartitionColumns & partition_columns_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) , storage(storage_) , read_from_format_info(std::move(read_from_format_info_)) , need_only_count(need_only_count_) + , query_configuration(query_configuration_) + , partition_columns(partition_columns_) , max_block_size(max_block_size_) , num_streams(num_streams_) { - query_configuration = storage.updateConfigurationAndGetCopy(context); virtual_columns = storage.getVirtualsList(); } @@ -172,6 +175,7 @@ private: bool need_only_count; StorageS3::Configuration query_configuration; NamesAndTypesList virtual_columns; + DataLakePartitionColumns partition_columns; size_t max_block_size; size_t num_streams; @@ -577,7 +581,8 @@ StorageS3Source::StorageS3Source( const String & url_host_and_port_, std::shared_ptr file_iterator_, const size_t max_parsing_threads_, - bool need_only_count_) + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -593,6 +598,7 @@ StorageS3Source::StorageS3Source( , client(client_) , sample_block(info.format_header) , format_settings(format_settings_) + , partition_columns(partition_columns_) , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) , max_parsing_threads(max_parsing_threads_) @@ -798,8 +804,37 @@ Chunk StorageS3Source::generate() size_t chunk_size = 0; if (const auto * input_format = reader.getInputFormat()) chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize()); + + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto filename = fs::path(reader.getPath()).filename().string(); + auto partition_values = partition_columns.find(filename); + + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!sample_block.has(name_and_type.name)) + continue; + + auto column_pos = sample_block.getPositionByName(name_and_type.name); + + const auto & type = name_and_type.type; + auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + /// Add correct values. + if (chunk.hasColumns()) + { + chunk.addColumn(column_pos, std::move(partition_column)); + } + else + { + chunk.addColumn(std::move(partition_column)); + } + } + } return chunk; } @@ -1072,8 +1107,8 @@ private: StorageS3::StorageS3( const Configuration & configuration_, const ContextPtr & context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, @@ -1190,17 +1225,20 @@ void StorageS3::read( bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; + auto query_configuration = updateConfigurationAndGetCopy(local_context); auto reading = std::make_unique( column_names, query_info, storage_snapshot, local_context, read_from_format_info.source_header, + query_configuration, *this, std::move(read_from_format_info), need_only_count, max_block_size, - num_streams); + num_streams, + partition_columns); query_plan.addStep(std::move(reading)); } @@ -1262,7 +1300,8 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), iterator_wrapper, max_parsing_threads, - need_only_count); + need_only_count, + partition_columns); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -1973,8 +2012,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) return std::make_shared( std::move(configuration), args.getContext(), - args.table_id, args.columns, + args.table_id, args.constraints, args.comment, format_settings, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index c8ab28fb20e..9e1d9eb5aad 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -141,7 +142,8 @@ public: const String & url_host_and_port, std::shared_ptr file_iterator_, size_t max_parsing_threads, - bool need_only_count_); + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_ = {}); ~StorageS3Source() override; @@ -170,6 +172,7 @@ private: std::shared_ptr client; Block sample_block; std::optional format_settings; + DataLakePartitionColumns partition_columns; struct ReaderHolder { @@ -305,8 +308,8 @@ public: StorageS3( const Configuration & configuration_, const ContextPtr & context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, @@ -363,6 +366,8 @@ protected: const Configuration & getConfiguration(); + mutable DataLakePartitionColumns partition_columns; + private: friend class StorageS3Cluster; friend class TableFunctionS3Cluster; diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 137e1dc27fe..e5676c5c25d 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -36,7 +36,7 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte if (cached_columns.empty()) return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + if (hasStaticStructure() && cached_columns == getActualTableStructure(context, is_insert_query)) return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 91165ba6705..1d946d9b5fa 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -26,21 +26,28 @@ protected: const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/, + ColumnsDescription cached_columns, bool /*is_insert_query*/) const override { ColumnsDescription columns; if (TableFunction::configuration.structure != "auto") columns = parseColumnsListFromString(TableFunction::configuration.structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; StoragePtr storage = Storage::create( - TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, StorageID(TableFunction::getDatabaseName(), table_name), - columns, ConstraintsDescription{}, String{}, std::nullopt); + TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, + columns, StorageID(TableFunction::getDatabaseName(), table_name), + ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); return storage; } + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + const char * getStorageTypeName() const override { return Storage::name; } ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override @@ -60,6 +67,8 @@ protected: TableFunction::configuration.format = "Parquet"; TableFunction::parseArguments(ast_function, context); } + + ColumnsDescription structure_hint; }; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index a8c100ebd44..93e43fb17ac 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -420,8 +420,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context StoragePtr storage = std::make_shared( configuration, context, - StorageID(getDatabaseName(), table_name), columns, + StorageID(getDatabaseName(), table_name), ConstraintsDescription{}, String{}, /// No format_settings for table function S3 diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index e727c4e4c89..3f8ed72479b 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -37,8 +37,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( storage = std::make_shared( configuration, context, - StorageID(getDatabaseName(), table_name), columns, + StorageID(getDatabaseName(), table_name), ConstraintsDescription{}, /* comment */String{}, /* format_settings */std::nullopt, /// No format_settings for S3Cluster diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 25f0b58e0f5..c6bb8fd8d69 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -511,3 +511,98 @@ def test_restart_broken_table_function(started_cluster): upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_partition_columns(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_partition_columns" + result_file = f"{TABLE_NAME}" + partition_column = "c" + + delta_table = ( + DeltaTable.create(spark) + .tableName(TABLE_NAME) + .location(f"/{result_file}") + .addColumn("a", "INT") + .addColumn("b", "STRING") + .addColumn("c", "DATE") + .partitionedBy(partition_column) + .execute() + ) + num_rows = 9 + + schema = StructType( + [ + StructField("a", IntegerType()), + StructField("b", StringType()), + StructField("c", DateType()), + ] + ) + + for i in range(1, num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_column).save( + f"/{TABLE_NAME}" + ) + + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + assert len(files) > 0 + print(f"Uploaded files: {files}") + + result = instance.query( + f"describe table deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')" + ).strip() + + assert ( + result + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date)" + ) + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + WHERE c == toDateTime('2000/01/05') + """ + ) + ) + assert result == 1 + + # instance.query( + # f""" + # DROP TABLE IF EXISTS {TABLE_NAME}; + # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) + # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + # ) + # assert ( + # int( + # instance.query( + # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" + # ) + # ) + # == num_rows - 1 + # ) + # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") + # assert False From d7de2ae0c9c37e9079ec575dc0a7ffeae5394206 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:31:13 +0000 Subject: [PATCH 025/140] remove optimization for old analyzer --- .../RewriteAggregateFunctionWithIfPass.cpp | 26 +-- .../RewriteFunctionToSubcolumnVisitor.cpp | 216 ------------------ .../RewriteFunctionToSubcolumnVisitor.h | 52 ----- src/Interpreters/TreeOptimizer.cpp | 61 ----- .../01872_functions_to_subcolumns.reference | 47 ---- .../01872_functions_to_subcolumns.sql | 41 ---- .../0_stateless/02115_map_contains.reference | 4 - .../0_stateless/02115_map_contains.sql | 12 - .../0_stateless/02116_tuple_element.reference | 25 -- .../0_stateless/02116_tuple_element.sql | 42 ---- ...tions_to_subcolumns_column_names.reference | 5 - ...1_functions_to_subcolumns_column_names.sql | 6 - ...2971_functions_to_subcolumns_map.reference | 12 - .../02971_functions_to_subcolumns_map.sql | 12 - ..._functions_to_subcolumns_variant.reference | 4 - .../02971_functions_to_subcolumns_variant.sql | 6 - .../03003_functions_to_subcolumns_final.sql | 1 + 17 files changed, 2 insertions(+), 570 deletions(-) delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.h delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.reference delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.sql delete mode 100644 tests/queries/0_stateless/02115_map_contains.reference delete mode 100644 tests/queries/0_stateless/02115_map_contains.sql delete mode 100644 tests/queries/0_stateless/02116_tuple_element.reference delete mode 100644 tests/queries/0_stateless/02116_tuple_element.sql diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 365bc28431a..58045c935aa 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -12,10 +12,7 @@ #include #include -<<<<<<< HEAD -======= #include ->>>>>>> upstream/master #include namespace DB @@ -102,16 +99,8 @@ public: FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns())); new_arguments[1] = std::move(not_function); -<<<<<<< HEAD - function_arguments_nodes.resize(2); - function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); - function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node); -======= function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); ->>>>>>> upstream/master + resolveAsAggregateFunctionWithIf(*function_node); } } } @@ -120,21 +109,8 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); -<<<<<<< HEAD const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); -======= - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node.getFunctionName() + "If", - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); ->>>>>>> upstream/master } }; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp deleted file mode 100644 index b1c79d4ecb7..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - return std::make_shared(Nested::concatenateName(name_in_storage, subcolumn_name)); -} - -ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("equals", ast, std::make_shared(0u)); -} - -ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("notEquals", ast, std::make_shared(0u)); -} - -ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("not", ast); -} - -ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("sum", makeASTFunction("not", ast)); -} - -const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = -{ - {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, - {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, - {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, - {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, - {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, -}; - -std::optional getColumnFromArgumentsToOptimize( - const ASTs & arguments, - const StorageMetadataPtr & metadata_snapshot) -{ - if (arguments.empty() || arguments.size() > 2) - return {}; - - const auto * identifier = arguments[0]->as(); - if (!identifier) - return {}; - - const auto & columns = metadata_snapshot->getColumns(); - const auto & name_in_storage = identifier->name(); - - if (!columns.has(name_in_storage)) - return {}; - - const auto & column_type = columns.get(name_in_storage).type; - if (column_type->hasDynamicSubcolumns()) - return {}; - - return NameAndTypePair{name_in_storage, column_type}; -} - -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) -{ - if (const auto * identifier = ast->as()) - { - ++data.indentifiers_count[identifier->name()]; - return; - } - - if (const auto * function = ast->as()) - { - visit(*function, data); - return; - } -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & expected_types_id = std::get<0>(it->second); - if (expected_types_id.contains(column_type_id)) - ++data.optimized_identifiers_count[column->name]; - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (literal && literal->value.getType() == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - ++data.optimized_identifiers_count[column->name]; - } - } -} - -void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - auto alias = function.getAliasOrColumnName(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & [expected_types_id, subcolumn_name, transformer] = it->second; - if (!expected_types_id.contains(column_type_id)) - return; - - ast = transformer(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64) - { - const auto & type_tuple = assert_cast(*column->type); - auto index = literal->value.get(); - subcolumn_name = type_tuple.getNameByPosition(index); - } - else if (value_type == Field::Types::String) - { - subcolumn_name = literal->value.get(); - } - else - { - return; - } - - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type != Field::Types::String) - return; - - subcolumn_name = literal->value.get(); - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - auto subcolumn = transformToSubcolumn(column->name, "keys"); - ast = makeASTFunction("has", subcolumn, arguments[1]); - ast->setAlias(alias); - } - } -} - -} diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h deleted file mode 100644 index 08eb6e27c52..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; -class ASTIdentifier; - -/// Collects info about identifiers to select columns to optimize to subcolumns. -class RewriteFunctionToSubcolumnFirstPassMatcher -{ -public: - struct Data - { - explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} - - StorageMetadataPtr metadata_snapshot; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - static void visit(const ASTPtr & ast, Data & data); - static void visit(const ASTFunction & function, Data & data); - static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } -}; - -using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; - -/// Rewrites functions to subcolumns, if possible, to reduce amount of read data. -/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnSecondPassData -{ -public: - using TypeToVisit = ASTFunction; - void visit(ASTFunction & function, ASTPtr & ast) const; - - RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) - : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) - { - } - - StorageMetadataPtr metadata_snapshot; - NameSet identifiers_to_optimize; -}; - -using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; - -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index d01a922bfd0..b88d75cd5a2 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -564,63 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) -{ - if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) - return; - - const auto & metadata_snapshot = result.storage_snapshot->metadata; - const auto & select_query = assert_cast(*query); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (select_query.final()) - return; - - NameSet all_key_columns; - - const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - all_key_columns.insert(index_columns.begin(), index_columns.end()); - } - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); - RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); - - NameSet identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - - if (identifiers_to_optimize.empty()) - return; - - RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); - RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -685,9 +627,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns) - optimizeFunctionsToSubcolumns(query, result); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference deleted file mode 100644 index 8c4017d6030..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ /dev/null @@ -1,47 +0,0 @@ -0 0 1 -0 1 0 -SELECT - id IS NULL, - `n.null` AS `isNull(n)`, - NOT `n.null` AS `isNotNull(n)` -FROM t_func_to_subcolumns -3 0 1 0 -0 1 0 \N -SELECT - `arr.size0` AS `length(arr)`, - `arr.size0` = 0 AS `empty(arr)`, - `arr.size0` != 0 AS `notEmpty(arr)`, - empty(n) -FROM t_func_to_subcolumns -['foo','bar'] [1,2] -[] [] -SELECT - `m.keys` AS `mapKeys(m)`, - `m.values` AS `mapValues(m)` -FROM t_func_to_subcolumns -1 -SELECT sum(NOT `n.null`) AS `count(n)` -FROM t_func_to_subcolumns -2 -SELECT count(id) -FROM t_func_to_subcolumns -1 0 0 -2 1 0 -3 0 0 -SELECT - id, - `n.null` AS `isNull(n)`, - right.n IS NULL -FROM t_func_to_subcolumns AS left -ALL FULL OUTER JOIN -( - SELECT - 1 AS id, - \'qqq\' AS n - UNION ALL - SELECT - 3 AS id, - \'www\' -) AS right USING (id) -0 10 -0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql deleted file mode 100644 index 45f83bf20e5..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ /dev/null @@ -1,41 +0,0 @@ -DROP TABLE IF EXISTS t_func_to_subcolumns; - -SET optimize_functions_to_subcolumns = 1; - -CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) -ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); - -SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; - -SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; - -SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; - -SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns; - -SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns; - -SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -DROP TABLE t_func_to_subcolumns; - -DROP TABLE IF EXISTS t_tuple_null; - -CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_tuple_null VALUES ((10)), ((20)); - -SELECT t IS NULL, t.null FROM t_tuple_null; - -DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference deleted file mode 100644 index e4ae4f951ba..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ /dev/null @@ -1,4 +0,0 @@ -SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` -FROM t_map_contains -1 -0 diff --git a/tests/queries/0_stateless/02115_map_contains.sql b/tests/queries/0_stateless/02115_map_contains.sql deleted file mode 100644 index 3c7f21cb4f1..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS t_map_contains; - -CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; - -INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); - -SET optimize_functions_to_subcolumns = 1; - -EXPLAIN SYNTAX SELECT mapContains(m, 'a') FROM t_map_contains; -SELECT mapContains(m, 'a') FROM t_map_contains; - -DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference deleted file mode 100644 index a8004f5e74c..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ /dev/null @@ -1,25 +0,0 @@ -1 -SELECT `t1.a` AS `tupleElement(t1, 1)` -FROM t_tuple_element -a -SELECT `t1.s` AS `tupleElement(t1, 2)` -FROM t_tuple_element -1 -SELECT `t1.a` AS `tupleElement(t1, \'a\')` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -1 2 -WITH (1, 2) AS t -SELECT - t.1, - t.2 -1 2 -WITH CAST(\'(1, 2)\', \'Tuple(a UInt32, b UInt32)\') AS t -SELECT - t.1, - tupleElement(t, \'b\') diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql deleted file mode 100644 index e3a5134f2b2..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS t_tuple_element; - -CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; -INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); - -SET optimize_functions_to_subcolumns = 1; - -SELECT t1.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t1.1 FROM t_tuple_element; - -SELECT tupleElement(t1, 2) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; - -SELECT tupleElement(t1, 'a') FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; - -SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -SELECT t2.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; - -SELECT tupleElement(t2, 1) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; - -SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -DROP TABLE t_tuple_element; - -WITH (1, 2) AS t SELECT t.1, t.2; -EXPLAIN SYNTAX WITH (1, 2) AS t SELECT t.1, t.2; - -WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); -EXPLAIN SYNTAX WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 03c16267db1..3389ea44074 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -1,8 +1,3 @@ -SELECT - `arr.size0` AS `length(arr)`, - `n.null` AS `isNull(n)` -FROM t_column_names -{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index b867148c8ca..48e5232d18b 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -4,12 +4,6 @@ CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Mem INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 50f21842ac1..9488291c8ff 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -1,15 +1,3 @@ -SELECT `m.size0` AS `length(m)` -FROM t_func_to_subcolumns_map -2 -1 -SELECT `m.size0` = 0 AS `empty(m)` -FROM t_func_to_subcolumns_map -0 -0 -SELECT `m.size0` != 0 AS `notEmpty(m)` -FROM t_func_to_subcolumns_map -1 -1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index c574e1033c0..e8a752a82d5 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -4,18 +4,6 @@ CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; -SELECT length(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; -SELECT empty(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; -SELECT notEmpty(m) FROM t_func_to_subcolumns_map; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference index 7a52155fc2d..04616738a15 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -1,7 +1,3 @@ -SELECT `v.String` AS `variantElement(v, \'String\')` -FROM t_func_to_subcolumns_variant -foo -\N SELECT __table1.`v.String` AS `variantElement(v, \'String\')` FROM default.t_func_to_subcolumns_variant AS __table1 foo diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql index 1cedd877289..511bcc44514 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -6,12 +6,6 @@ CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; -SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql index 5975347ad09..3fe29139c5f 100644 --- a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS t_length_1; DROP TABLE IF EXISTS t_length_2; +SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; SET optimize_on_insert = 0; From a8f0cfe580291c8612c43e5396693fe9cd4d463e Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 13 May 2024 19:54:29 +0200 Subject: [PATCH 026/140] Fixes --- .../DataLakes/DeltaLakeMetadataParser.cpp | 76 ++++++++++++------- .../DataLakes/DeltaLakeMetadataParser.h | 3 + src/Storages/DataLakes/HudiMetadataParser.h | 3 + src/Storages/DataLakes/IStorageDataLake.h | 40 ++++++++-- src/Storages/StorageS3.cpp | 12 ++- src/Storages/StorageS3.h | 6 ++ tests/integration/test_storage_delta/test.py | 16 ++-- 7 files changed, 116 insertions(+), 40 deletions(-) diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 50b6ca83cb4..30a617f3460 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } template @@ -184,6 +185,10 @@ struct DeltaLakeMetadataParser::Impl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + if (object->has("add")) { auto add_object = object->get("add").extract(); @@ -218,34 +223,49 @@ struct DeltaLakeMetadataParser::Impl auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration.getPath()) / path); } - if (file_schema.empty()) + if (object->has("metaData")) { - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); - if (object->has("metaData")) + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + NamesAndTypesList current_schema; + for (size_t i = 0; i < fields->size(); ++i) { - const auto metadata_object = object->get("metaData").extract(); - const auto schema_object = metadata_object->getValue("schemaString"); + const auto field = fields->getObject(static_cast(i)); + auto name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); - Poco::JSON::Parser p; - Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = name; - const auto fields = fields_object->get("fields").extract(); - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + name, type, is_nullable, physical_name); - file_schema.push_back({name, getFieldType(field, "type", is_nullable)}); - } + current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); } } - /// TODO: Check if schema in each file is the same? } } @@ -278,12 +298,20 @@ struct DeltaLakeMetadataParser::Impl return value; else if (which.isInt8()) return parse(value); + else if (which.isUInt8()) + return parse(value); else if (which.isInt16()) return parse(value); + else if (which.isUInt16()) + return parse(value); else if (which.isInt32()) return parse(value); + else if (which.isUInt32()) + return parse(value); else if (which.isInt64()) return parse(value); + else if (which.isUInt64()) + return parse(value); else if (which.isFloat32()) return parse(value); else if (which.isFloat64()) @@ -299,14 +327,6 @@ struct DeltaLakeMetadataParser::Impl readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); return time; } - // else if (which.isDecimal32()) - // return parse(value); - // else if (which.isDecimal64()) - // return parse(value); - // else if (which.isDecimal128()) - // return parse(value); - // else if (which.isDecimal256()) - // return parse(value); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index 58cf7acd2a3..701f15a8a9f 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -20,10 +20,13 @@ public: DataLakePartitionColumns getPartitionColumns() const { return partition_columns; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const { return column_name_to_physical_name; } + private: struct Impl; std::shared_ptr impl; NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; DataLakePartitionColumns partition_columns; Strings data_files; }; diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 9e2901a9d24..3f8631dba77 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -20,10 +20,13 @@ public: DataLakePartitionColumns getPartitionColumns() const { return {}; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const { return column_name_to_physical_name; } + private: struct Impl; std::shared_ptr impl; Strings data_files; + std::unordered_map column_name_to_physical_name; }; } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index be23c017043..b0c5af1a6e4 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "PartitionColumns.h" #include @@ -32,11 +33,13 @@ public: Args && ...args) { std::unique_ptr metadata; - Configuration read_configuration; + Configuration base_configuration{configuration_}; + base_configuration.update(context_); + Configuration read_configuration{base_configuration}; + try { - base_configuration.update(context_); metadata = std::make_unique(base_configuration, context_); read_configuration = getConfigurationForDataRead(*metadata, base_configuration, context_); } @@ -124,23 +127,48 @@ private: return configuration; } + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!metadata) + { + base_configuration.update(local_context); + metadata = std::make_unique(base_configuration, local_context); + } + auto column_mapping = metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } + void updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); - auto metadata = MetadataParser(base_configuration, local_context); - auto new_keys = metadata.getFiles(); - Storage::partition_columns = metadata.getPartitionColumns(); + metadata = std::make_unique(base_configuration, local_context); + auto new_keys = metadata->getFiles(); + Storage::partition_columns = metadata->getPartitionColumns(); if (!updated && new_keys == Storage::getConfiguration().keys) return; - auto read_configuration = getConfigurationForDataRead(metadata, base_configuration, local_context); + auto read_configuration = getConfigurationForDataRead(*metadata, base_configuration, local_context); Storage::useConfiguration(read_configuration); } Configuration base_configuration; std::mutex configuration_update_mutex; + std::unique_ptr metadata; LoggerPtr log; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 205b5d3381e..fc05c40c38f 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1210,6 +1210,15 @@ bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); } +ReadFromFormatInfo StorageS3::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageS3::read( QueryPlan & query_plan, const Names & column_names, @@ -1220,7 +1229,8 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 9e1d9eb5aad..535c3a97a68 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -395,6 +395,12 @@ private: bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; + + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); }; } diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index c6bb8fd8d69..4cb71895881 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -153,7 +153,7 @@ def test_single_log_file(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_single_log_file" - inserted_data = "SELECT number, toString(number + 1) FROM numbers(100)" + inserted_data = "SELECT number as a, toString(number + 1) as b FROM numbers(100)" parquet_data_path = create_initial_data_file( started_cluster, instance, inserted_data, TABLE_NAME ) @@ -520,7 +520,7 @@ def test_partition_columns(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_partition_columns" result_file = f"{TABLE_NAME}" - partition_column = "c" + partition_columns = ["b", "c", "d", "e"] delta_table = ( DeltaTable.create(spark) @@ -529,7 +529,9 @@ def test_partition_columns(started_cluster): .addColumn("a", "INT") .addColumn("b", "STRING") .addColumn("c", "DATE") - .partitionedBy(partition_column) + .addColumn("d", "INT") + .addColumn("e", "BOOLEAN") + .partitionedBy(partition_columns) .execute() ) num_rows = 9 @@ -539,6 +541,8 @@ def test_partition_columns(started_cluster): StructField("a", IntegerType()), StructField("b", StringType()), StructField("c", DateType()), + StructField("d", IntegerType()), + StructField("e", BooleanType()), ] ) @@ -548,11 +552,13 @@ def test_partition_columns(started_cluster): i, "test" + str(i), datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + i, + False, ) ] df = spark.createDataFrame(data=data, schema=schema) df.printSchema() - df.write.mode("append").format("delta").partitionBy(partition_column).save( + df.write.mode("append").format("delta").partitionBy(partition_columns).save( f"/{TABLE_NAME}" ) @@ -569,7 +575,7 @@ def test_partition_columns(started_cluster): assert ( result - == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date)" + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date32)\t\t\t\t\t\nd\tNullable(Int32)\t\t\t\t\t\ne\tNullable(Bool)" ) result = int( From 0a1daa00e0f8b57f73e64a7f9794ce1e7cef2c86 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:40:38 +0000 Subject: [PATCH 027/140] fix functions with if --- .../Passes/RewriteAggregateFunctionWithIfPass.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 58045c935aa..45f3469b48e 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -71,7 +71,7 @@ public: new_arguments[1] = std::move(if_arguments_nodes[0]); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } else if (first_const_node) @@ -100,18 +100,10 @@ public: new_arguments[1] = std::move(not_function); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } } - -private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) - { - auto result_type = function_node.getResultType(); - const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; - resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); - } }; } From ccb4bd63700267169cbc30da68392ba6d8abb0d9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 6 Jun 2024 19:35:57 +0200 Subject: [PATCH 028/140] Resolve conflicts: apply implementation to StorageObjectStorage --- .../DataLakes/DeltaLakeMetadata.cpp | 311 ++++++++++++++++-- .../DataLakes/DeltaLakeMetadata.h | 12 +- .../ObjectStorage/DataLakes/HudiMetadata.h | 3 + .../DataLakes/IDataLakeMetadata.h | 3 + .../DataLakes/IStorageDataLake.h | 27 +- .../ObjectStorage/DataLakes/IcebergMetadata.h | 3 + .../ObjectStorage/StorageObjectStorage.cpp | 21 +- .../ObjectStorage/StorageObjectStorage.h | 8 + .../StorageObjectStorageSource.cpp | 31 +- .../StorageObjectStorageSource.h | 5 +- 10 files changed, 384 insertions(+), 40 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 38bf3112ee2..bd3e21f12fd 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -4,19 +4,41 @@ #include #if USE_AWS_S3 && USE_PARQUET -#include + +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include -#include +#include +#include +#include + +namespace fs = std::filesystem; namespace DB { @@ -25,6 +47,8 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } struct DeltaLakeMetadata::Impl @@ -74,9 +98,17 @@ struct DeltaLakeMetadata::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles() + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles() { std::set result_files; + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) @@ -90,7 +122,7 @@ struct DeltaLakeMetadata::Impl if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files); + processMetadataFile(file_path, current_schema, current_partition_columns, result_files); } LOG_TRACE( @@ -101,10 +133,10 @@ struct DeltaLakeMetadata::Impl { const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files); + processMetadataFile(key, current_schema, current_partition_columns, result_files); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -136,7 +168,11 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile(const String & key, std::set & result) const + void processMetadataFile( + const String & key, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, + std::set & result) { auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(key), read_settings); @@ -157,20 +193,236 @@ struct DeltaLakeMetadata::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("add")) { - const auto path = json["add"]["path"].getString(); - result.insert(std::filesystem::path(configuration->getPath()) / path); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } } - else if (json.has("remove")) + else if (object->has("remove")) { - const auto path = json["remove"]["path"].getString(); - result.erase(std::filesystem::path(configuration->getPath()) / path); + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); + } + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + NamesAndTypesList current_schema; + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } } } } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isUInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isUInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isUInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isUInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -277,8 +529,8 @@ struct DeltaLakeMetadata::Impl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); + std::shared_ptr file_schema; + THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); ArrowColumnToCHColumn column_reader( header, "Parquet", @@ -327,16 +579,13 @@ DeltaLakeMetadata::DeltaLakeMetadata( ContextPtr context_) : impl(std::make_unique(object_storage_, configuration_, context_)) { -} - -Strings DeltaLakeMetadata::getDataFiles() const -{ - if (!data_files.empty()) - return data_files; - auto result = impl->processMetadataFiles(); - data_files = Strings(result.begin(), result.end()); - return data_files; + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; + + LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index e527721b29e..da9fa1e76ce 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -20,9 +20,13 @@ public: ConfigurationPtr configuration_, ContextPtr context_); - Strings getDataFiles() const override; + Strings getDataFiles() const override { return data_files; } - NamesAndTypesList getTableSchema() const override { return {}; } + NamesAndTypesList getTableSchema() const override { return schema; } + + DataLakePartitionColumns getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override { @@ -44,6 +48,10 @@ private: struct Impl; const std::shared_ptr impl; mutable Strings data_files; + + NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index 3ab274b1fbf..ac978732804 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,8 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * hudi_metadata = dynamic_cast(&other); @@ -46,6 +48,7 @@ private: const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index a2bd5adb947..53b8abf7a5c 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -2,6 +2,7 @@ #include #include #include +#include "PartitionColumns.h" namespace DB { @@ -13,6 +14,8 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; + virtual DataLakePartitionColumns getPartitionColumns() const { return {}; } + virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 83865c47eb8..64711d1774c 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -81,7 +81,7 @@ public: auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); auto schema_from_metadata = metadata->getTableSchema(); - if (schema_from_metadata != NamesAndTypesList{}) + if (!schema_from_metadata.empty()) { return ColumnsDescription(std::move(schema_from_metadata)); } @@ -99,6 +99,7 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + Storage::partition_columns = new_metadata->getPartitionColumns(); if (current_metadata && *current_metadata == *new_metadata) return; @@ -128,6 +129,30 @@ public: private: ConfigurationPtr base_configuration; DataLakeMetadataPtr current_metadata; + + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!current_metadata) + { + Storage::updateConfiguration(local_context); + current_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + } + auto column_mapping = current_metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } }; using StorageIceberg = IStorageDataLake; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 06dbd373bf9..39673a03cb1 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -82,6 +82,8 @@ public: /// Get table schema parsed from metadata. NamesAndTypesList getTableSchema() const override { return schema; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -104,6 +106,7 @@ private: Int32 current_schema_id; NamesAndTypesList schema; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 2c8e60b49d0..14bbad659f1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -111,7 +111,8 @@ public: const bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_) + size_t num_streams_, + const DataLakePartitionColumns & partition_columns_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) @@ -123,6 +124,7 @@ public: , max_block_size(max_block_size_) , num_streams(num_streams_) , distributed_processing(distributed_processing_) + , partition_columns(partition_columns_) { } @@ -161,7 +163,7 @@ public: { auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count, partition_columns); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -190,6 +192,7 @@ private: const size_t max_block_size; size_t num_streams; const bool distributed_processing; + DataLakePartitionColumns partition_columns; void createIterator(const ActionsDAG::Node * predicate) { @@ -203,6 +206,15 @@ private: }; } +ReadFromFormatInfo StorageObjectStorage::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, @@ -222,7 +234,7 @@ void StorageObjectStorage::read( } const auto read_from_format_info = prepareReadingFromFormat( - column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -240,7 +252,8 @@ void StorageObjectStorage::read( need_only_count, local_context, max_block_size, - num_streams); + num_streams, + partition_columns); query_plan.addStep(std::move(read_step)); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index f45d8c1f01a..645f97201ec 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -117,6 +118,12 @@ public: protected: virtual void updateConfiguration(ContextPtr local_context); + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); + static std::unique_ptr createReadBufferIterator( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, @@ -129,6 +136,7 @@ protected: const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; + mutable DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b31d0f8a92e..97c1ecc38b9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -49,7 +49,8 @@ StorageObjectStorageSource::StorageObjectStorageSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_) + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -68,6 +69,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) + , partition_columns(partition_columns_) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -201,6 +203,33 @@ Chunk StorageObjectStorageSource::generate() getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), object_info.metadata->size_bytes, &filename); + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto partition_values = partition_columns.find(filename); + + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; + + auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + + const auto & type = name_and_type.type; + auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + /// Add correct values. + if (chunk.hasColumns()) + { + chunk.addColumn(column_pos, std::move(partition_column)); + } + else + { + chunk.addColumn(std::move(partition_column)); + } + } + } + return chunk; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..ab8d9588155 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -39,7 +40,8 @@ public: UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_); + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_ = {}); ~StorageObjectStorageSource() override; @@ -81,6 +83,7 @@ protected: bool initialized = false; size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); + DataLakePartitionColumns partition_columns; struct ReaderHolder : private boost::noncopyable { From 1c1628db0f00d25c97e552e1b63d2f2a5cae9ee0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 7 Jun 2024 13:43:26 +0200 Subject: [PATCH 029/140] Minor --- .../ObjectStorage/DataLakes/DeltaLakeMetadata.h | 2 +- src/Storages/ObjectStorage/DataLakes/HudiMetadata.h | 3 +++ .../ObjectStorage/DataLakes/IDataLakeMetadata.h | 2 +- .../ObjectStorage/DataLakes/IStorageDataLake.h | 5 ++++- src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h | 3 +++ .../ObjectStorage/DataLakes/PartitionColumns.h | 2 ++ .../ObjectStorage/StorageObjectStorageSource.cpp | 10 +++------- src/TableFunctions/ITableFunctionDataLake.h | 2 -- 8 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index da9fa1e76ce..926bd1b451d 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -24,7 +24,7 @@ public: NamesAndTypesList getTableSchema() const override { return schema; } - DataLakePartitionColumns getPartitionColumns() const override { return partition_columns; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index ac978732804..b060b1b0d39 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,8 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override @@ -49,6 +51,7 @@ private: const ConfigurationPtr configuration; mutable Strings data_files; std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index 53b8abf7a5c..2954d50db91 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -14,7 +14,7 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; - virtual DataLakePartitionColumns getPartitionColumns() const { return {}; } + virtual const DataLakePartitionColumns & getPartitionColumns() const = 0; virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 64711d1774c..97fb9890490 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -99,7 +99,10 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - Storage::partition_columns = new_metadata->getPartitionColumns(); + auto partition_columns = new_metadata->getPartitionColumns(); + + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; if (current_metadata && *current_metadata == *new_metadata) return; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 39673a03cb1..9476ac6e7d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -84,6 +84,8 @@ public: const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -107,6 +109,7 @@ private: NamesAndTypesList schema; mutable Strings data_files; std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h index 604dbbf78fa..eb605559145 100644 --- a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h +++ b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h @@ -9,6 +9,8 @@ struct DataLakePartitionColumn { NameAndTypePair name_and_type; Field value; + + bool operator ==(const DataLakePartitionColumn & other) const = default; }; /// Data file -> partition columns diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 97c1ecc38b9..3ae51cd4235 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -212,21 +212,17 @@ Chunk StorageObjectStorageSource::generate() if (!read_from_format_info.source_header.has(name_and_type.name)) continue; - auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - const auto & type = name_and_type.type; - auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); /// This column is filled with default value now, remove it. chunk.erase(column_pos); + /// Add correct values. if (chunk.hasColumns()) - { chunk.addColumn(column_pos, std::move(partition_column)); - } else - { chunk.addColumn(std::move(partition_column)); - } } } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index f7915643a08..fe6e5b3e593 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -69,8 +69,6 @@ protected: /// Set default format to Parquet if it's not specified in arguments. TableFunction::parseArguments(ast_function, context); } - - ColumnsDescription structure_hint; }; struct TableFunctionIcebergName From dd28c052671651dd0891ad4c8a0a43f9842a6ce3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 20:20:39 +0000 Subject: [PATCH 030/140] fix some cases --- .../Passes/FunctionToSubcolumnsPass.cpp | 17 +++++ .../array/FunctionsMapMiscellaneous.cpp | 6 +- ...functions_to_subcolumns_analyzer.reference | 72 +++++++++---------- ...71_function_to_subcolumns_fuzzer.reference | 3 + .../03171_function_to_subcolumns_fuzzer.sql | 39 ++++++++++ 5 files changed, 97 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 030feac65dc..9cfd22cbef5 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -269,10 +270,24 @@ public: enterImpl(*function_node, *first_argument_node, *table_node); return; } + + if (const auto * join_node = node->as()) + { + has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + return; + } } std::unordered_set getIdentifiersToOptimize() const { + if (has_join_use_nulls) + { + /// Do not optimize if we have JOIN with setting join_use_null. + /// It may change the behaviour if subcolumn can be coverted + /// to nullable while the original column cannot. + return {}; + } + /// Do not optimize if full column is requested in other context. /// It doesn't make sense because it doesn't reduce amount of read data /// and optimized functions are not computation heavy. But introducing @@ -306,7 +321,9 @@ private: std::unordered_set all_key_columns; std::unordered_map identifiers_count; std::unordered_map optimized_identifiers_count; + NameSet processed_tables; + bool has_join_use_nulls = false; void enterImpl(const TableNode & table_node) { diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 76c1ec18171..c3586a57161 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -51,6 +51,8 @@ public: bool isVariadic() const override { return impl.isVariadic(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } @@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase struct MapToSubcolumnAdapter { - static_assert(position <= 1); + static_assert(position <= 1, "position of Map subcolumn must be 0 or 1"); static void extractNestedTypes(DataTypes & types) { @@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; }; using FunctionMapValues = FunctionMapToArrayAdapter, NameMapValues>; struct NameMapContains { static constexpr auto name = "mapContains"; }; -using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; +using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; struct NameMapFilter { static constexpr auto name = "mapFilter"; }; using FunctionMapFilter = FunctionMapToArrayAdapter, NameMapFilter>; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index e409e9ad89f..32bacfba5ea 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -7,20 +7,22 @@ QUERY id: 0 isNotNull(n) UInt8 PROJECTION LIST id: 1, nodes: 3 - FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + CONSTANT id: 2, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 3, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: n.null, result_type: UInt8, source_id: 6 + FUNCTION id: 8, function_name: not, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 3, nodes: 1 - COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 - COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 - FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 8, nodes: 1 - COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: n.null, result_type: UInt8, source_id: 6 JOIN TREE - TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + TABLE id: 6, alias: __table1, table_name: default.t_func_to_subcolumns SELECT - __table1.id IS NULL AS `isNull(id)`, + _CAST(0, \'UInt8\') AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, NOT __table1.`n.null` AS `isNotNull(n)` FROM default.t_func_to_subcolumns AS __table1 @@ -120,64 +122,58 @@ QUERY id: 0 LIST id: 1, nodes: 3 COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 - FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 1 - COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 6, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: n, result_type: String, source_id: 9 JOIN TREE - JOIN id: 9, strictness: ALL, kind: FULL + JOIN id: 10, strictness: ALL, kind: FULL LEFT TABLE EXPRESSION TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns RIGHT TABLE EXPRESSION - UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + UNION id: 9, alias: __table2, is_subquery: 1, union_mode: UNION_ALL QUERIES - LIST id: 10, nodes: 2 - QUERY id: 11, alias: __table3 + LIST id: 11, nodes: 2 + QUERY id: 12, alias: __table3 PROJECTION COLUMNS id UInt8 - n String PROJECTION - LIST id: 12, nodes: 2 - CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 - CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 JOIN TREE TABLE id: 15, alias: __table4, table_name: system.one QUERY id: 16, alias: __table5 PROJECTION COLUMNS id UInt8 - \'www\' String PROJECTION - LIST id: 17, nodes: 2 + LIST id: 17, nodes: 1 CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 - CONSTANT id: 19, constant_value: \'www\', constant_value_type: String JOIN TREE - TABLE id: 20, alias: __table6, table_name: system.one + TABLE id: 19, alias: __table6, table_name: system.one JOIN EXPRESSION - LIST id: 21, nodes: 1 - COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + LIST id: 20, nodes: 1 + COLUMN id: 21, column_name: id, result_type: UInt64, source_id: 10 EXPRESSION - LIST id: 23, nodes: 2 - COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 - COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 24, column_name: id, result_type: UInt8, source_id: 9 SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, - __table2.n IS NULL AS `isNull(right.n)` + _CAST(0, \'UInt8\') AS `isNull(right.n)` FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( ( - SELECT - 1 AS id, - \'qqq\' AS n + SELECT 1 AS id FROM system.one AS __table4 ) UNION ALL ( - SELECT - 3 AS id, - \'www\' AS `\'www\'` + SELECT 3 AS id FROM system.one AS __table6 ) ) AS __table2 USING (id) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference new file mode 100644 index 00000000000..be47c4ab571 --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -0,0 +1,3 @@ +1 +2 1 +3 0 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql new file mode 100644 index 00000000000..587288bbfdf --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -0,0 +1,39 @@ +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_func_to_subcolumns_map_2; + +CREATE TABLE t_func_to_subcolumns_map_2 (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map_2 VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SELECT sum(mapContains(m, toNullable('aaa'))) FROM t_func_to_subcolumns_map_2; + +DROP TABLE t_func_to_subcolumns_map_2; + +DROP TABLE IF EXISTS t_func_to_subcolumns_join; + +CREATE TABLE t_func_to_subcolumns_join (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_join VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SET join_use_nulls = 1; + +SELECT + id, + right.n IS NULL +FROM t_func_to_subcolumns_join AS left +FULL OUTER JOIN +( + SELECT + 1 AS id, + 'qqq' AS n + UNION ALL + SELECT + 3 AS id, + 'www' +) AS right USING (id) +WHERE empty(arr); + +DROP TABLE t_func_to_subcolumns_join; From 0da1bb3f049f6bcc76dee0821c5dcc74f2dd55b2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 22:40:54 +0000 Subject: [PATCH 031/140] fix typo --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9cfd22cbef5..bc2028e1b43 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -283,7 +283,7 @@ public: if (has_join_use_nulls) { /// Do not optimize if we have JOIN with setting join_use_null. - /// It may change the behaviour if subcolumn can be coverted + /// It may change the behaviour if subcolumn can be converted /// to nullable while the original column cannot. return {}; } From 6d3836aca49f5e006db213fc0bf728b8c12d151d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 27 May 2024 08:59:21 +0800 Subject: [PATCH 032/140] refactor hash join to reduce target size --- src/CMakeLists.txt | 1 + src/Functions/FunctionJoinGet.cpp | 2 +- src/Interpreters/ConcurrentHashJoin.h | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/GraceHashJoin.cpp | 2 +- src/Interpreters/HashJoin/AddedColumns.cpp | 138 ++ src/Interpreters/HashJoin/AddedColumns.h | 226 ++++ src/Interpreters/HashJoin/FullHashJoin.cpp | 11 + src/Interpreters/HashJoin/HashJoin.cpp | 1328 +++++++++++++++++++ src/Interpreters/{ => HashJoin}/HashJoin.h | 60 +- src/Interpreters/HashJoin/HashJoinMethods.h | 956 +++++++++++++ src/Interpreters/HashJoin/InnerHashJoin.cpp | 12 + src/Interpreters/HashJoin/JoinFeatures.h | 28 + src/Interpreters/HashJoin/JoinUsedFlags.h | 154 +++ src/Interpreters/HashJoin/KeyGetter.h | 73 + src/Interpreters/HashJoin/KnowRowsHolder.h | 148 +++ src/Interpreters/HashJoin/LeftHashJoin.cpp | 11 + src/Interpreters/HashJoin/RightHashJoin.cpp | 11 + src/Interpreters/JoinSwitcher.cpp | 2 +- src/Interpreters/joinDispatch.h | 2 +- src/Planner/PlannerJoinTree.cpp | 2 +- src/Planner/PlannerJoins.cpp | 2 +- src/Storages/StorageJoin.cpp | 2 +- 23 files changed, 3117 insertions(+), 58 deletions(-) create mode 100644 src/Interpreters/HashJoin/AddedColumns.cpp create mode 100644 src/Interpreters/HashJoin/AddedColumns.h create mode 100644 src/Interpreters/HashJoin/FullHashJoin.cpp create mode 100644 src/Interpreters/HashJoin/HashJoin.cpp rename src/Interpreters/{ => HashJoin}/HashJoin.h (91%) create mode 100644 src/Interpreters/HashJoin/HashJoinMethods.h create mode 100644 src/Interpreters/HashJoin/InnerHashJoin.cpp create mode 100644 src/Interpreters/HashJoin/JoinFeatures.h create mode 100644 src/Interpreters/HashJoin/JoinUsedFlags.h create mode 100644 src/Interpreters/HashJoin/KeyGetter.h create mode 100644 src/Interpreters/HashJoin/KnowRowsHolder.h create mode 100644 src/Interpreters/HashJoin/LeftHashJoin.cpp create mode 100644 src/Interpreters/HashJoin/RightHashJoin.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84aaec17a5b..eea9c40e914 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,6 +210,7 @@ add_object_library(clickhouse_analyzer_passes Analyzer/Resolve) add_object_library(clickhouse_planner Planner) add_object_library(clickhouse_interpreters Interpreters) add_object_library(clickhouse_interpreters_cache Interpreters/Cache) +add_object_library(clickhouse_interpreters_hash_join Interpreters/HashJoin) add_object_library(clickhouse_interpreters_access Interpreters/Access) add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL) add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy) diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 085c4db3f57..0a2859fe864 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index c797ff27ece..efac648214e 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d80d5cd5b93..2123706d972 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4dd2f89b90a..6970048269b 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/HashJoin/AddedColumns.cpp b/src/Interpreters/HashJoin/AddedColumns.cpp new file mode 100644 index 00000000000..930a352744d --- /dev/null +++ b/src/Interpreters/HashJoin/AddedColumns.cpp @@ -0,0 +1,138 @@ +#include +#include + +namespace DB +{ +JoinOnKeyColumns::JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) + : key_names(key_names_) + , materialized_keys_holder(JoinCommon::materializeColumns( + block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. + , key_columns(JoinCommon::getRawPointers(materialized_keys_holder)) + , null_map(nullptr) + , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) + , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) + , key_sizes(key_sizes_) +{ +} + +template<> void AddedColumns::buildOutput() +{ +} + +template<> +void AddedColumns::buildOutput() +{ + for (size_t i = 0; i < this->size(); ++i) + { + auto& col = columns[i]; + size_t default_count = 0; + auto apply_default = [&]() + { + if (default_count > 0) + { + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } + }; + + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) + { + default_count++; + continue; + } + apply_default(); + const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); + /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. + if (is_join_get) + { + if (auto * nullable_col = typeid_cast(col.get()); + nullable_col && !column_from_block.column->isNullable()) + { + nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); + continue; + } + } + col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + } + apply_default(); + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ + if (lazy_defaults_count) + { + for (size_t j = 0, size = right_indexes.size(); j < size; ++j) + JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); + lazy_defaults_count = 0; + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) +{ + if (has_defaults) + applyLazyDefaults(); + +#ifndef NDEBUG + checkBlock(block); +#endif + if (is_join_get) + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + if (auto * nullable_col = nullable_column_ptrs[j]) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + else + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } + else + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) +{ +#ifndef NDEBUG + checkBlock(block); +#endif + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(reinterpret_cast(&block)); + lazy_output.row_nums.emplace_back(static_cast(row_num)); + } +} +template<> +void AddedColumns::appendDefaultRow() +{ + ++lazy_defaults_count; +} + +template<> +void AddedColumns::appendDefaultRow() +{ + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(0); + lazy_output.row_nums.emplace_back(0); + } +} +} diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h new file mode 100644 index 00000000000..13a7df6f498 --- /dev/null +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -0,0 +1,226 @@ +#pragma once +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + +struct JoinOnKeyColumns +{ + Names key_names; + + Columns materialized_keys_holder; + ColumnRawPtrs key_columns; + + ConstNullMapPtr null_map; + ColumnPtr null_map_holder; + + /// Only rows where mask == true can be joined + JoinCommon::JoinMask join_mask_column; + + Sizes key_sizes; + + explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_); + + bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } +}; + +template +class AddedColumns +{ +public: + struct TypeAndName + { + DataTypePtr type; + String name; + String qualified_name; + + TypeAndName(DataTypePtr type_, const String & name_, const String & qualified_name_) + : type(type_), name(name_), qualified_name(qualified_name_) + { + } + }; + + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + + AddedColumns( + const Block & left_block_, + const Block & block_with_columns_to_add, + const Block & saved_block_sample, + const HashJoin & join, + std::vector && join_on_keys_, + ExpressionActionsPtr additional_filter_expression_, + bool is_asof_join, + bool is_join_get_) + : left_block(left_block_) + , join_on_keys(join_on_keys_) + , additional_filter_expression(additional_filter_expression_) + , rows_to_add(left_block.rows()) + , is_join_get(is_join_get_) + { + size_t num_columns_to_add = block_with_columns_to_add.columns(); + if (is_asof_join) + ++num_columns_to_add; + + if constexpr (lazy) + { + has_columns_to_add = num_columns_to_add > 0; + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); + } + + columns.reserve(num_columns_to_add); + type_name.reserve(num_columns_to_add); + right_indexes.reserve(num_columns_to_add); + + for (const auto & src_column : block_with_columns_to_add) + { + /// Column names `src_column.name` and `qualified_name` can differ for StorageJoin, + /// because it uses not qualified right block column names + auto qualified_name = join.getTableJoin().renamedRightColumnName(src_column.name); + /// Don't insert column if it's in left block + if (!left_block.has(qualified_name)) + addColumn(src_column, qualified_name); + } + + if (is_asof_join) + { + assert(join_on_keys.size() == 1); + const ColumnWithTypeAndName & right_asof_column = join.rightAsofKeyColumn(); + addColumn(right_asof_column, right_asof_column.name); + left_asof_key = join_on_keys[0].key_columns.back(); + } + + for (auto & tn : type_name) + right_indexes.push_back(saved_block_sample.getPositionByName(tn.name)); + + nullable_column_ptrs.resize(right_indexes.size(), nullptr); + for (size_t j = 0; j < right_indexes.size(); ++j) + { + /** If it's joinGetOrNull, we will have nullable columns in result block + * even if right column is not nullable in storage (saved_block_sample). + */ + const auto & saved_column = saved_block_sample.getByPosition(right_indexes[j]).column; + if (columns[j]->isNullable() && !saved_column->isNullable()) + nullable_column_ptrs[j] = typeid_cast(columns[j].get()); + } + } + + size_t size() const { return columns.size(); } + + void buildOutput(); + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); + } + + void appendFromBlock(const Block & block, size_t row_num, bool has_default); + + void appendDefaultRow(); + + void applyLazyDefaults(); + + const IColumn & leftAsofKey() const { return *left_asof_key; } + + Block left_block; + std::vector join_on_keys; + ExpressionActionsPtr additional_filter_expression; + + size_t max_joined_block_rows = 0; + size_t rows_to_add; + std::unique_ptr offsets_to_replicate; + bool need_filter = false; + IColumn::Filter filter; + + void reserve(bool need_replicate) + { + if (!max_joined_block_rows) + return; + + /// Do not allow big allocations when user set max_joined_block_rows to huge value + size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); + + if (need_replicate) + /// Reserve 10% more space for columns, because some rows can be repeated + reserve_size = static_cast(1.1 * reserve_size); + + for (auto & column : columns) + column->reserve(reserve_size); + } + +private: + + void checkBlock(const Block & block) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } + } + + MutableColumns columns; + bool is_join_get; + std::vector right_indexes; + std::vector type_name; + std::vector nullable_column_ptrs; + size_t lazy_defaults_count = 0; + + /// for lazy + // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, + // default_count cannot represent the position of the row + LazyOutput lazy_output; + bool has_columns_to_add; + + /// for ASOF + const IColumn * left_asof_key = nullptr; + + + void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name, qualified_name); + } +}; + +/// Adapter class to pass into addFoundRowAll +/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, +/// because they need to be filtered by additional_filter_expression. +class PreSelectedRows : public std::vector +{ +public: + void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } +}; + +} diff --git a/src/Interpreters/HashJoin/FullHashJoin.cpp b/src/Interpreters/HashJoin/FullHashJoin.cpp new file mode 100644 index 00000000000..5d058d10fc2 --- /dev/null +++ b/src/Interpreters/HashJoin/FullHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp new file mode 100644 index 00000000000..c8ef2d41ade --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -0,0 +1,1328 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +#include +#include + +namespace CurrentMetrics +{ + extern const Metric TemporaryFilesForJoin; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int INCOMPATIBLE_TYPE_OF_JOIN; + extern const int UNSUPPORTED_JOIN_KEYS; + extern const int LOGICAL_ERROR; + extern const int SYNTAX_ERROR; + extern const int SET_SIZE_LIMIT_EXCEEDED; + extern const int TYPE_MISMATCH; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int INVALID_JOIN_ON_EXPRESSION; +} + +namespace +{ + +struct NotProcessedCrossJoin : public ExtraBlock +{ + size_t left_position; + size_t right_block; + std::unique_ptr reader; +}; + + +Int64 getCurrentQueryMemoryUsage() +{ + /// Use query-level memory tracker + if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = memory_tracker_child->getParent()) + return memory_tracker->get(); + return 0; +} + +} + +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } +} + +HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, + bool any_take_last_row_, size_t reserve_num_, const String & instance_id_) + : table_join(table_join_) + , kind(table_join->kind()) + , strictness(table_join->strictness()) + , any_take_last_row(any_take_last_row_) + , reserve_num(reserve_num_) + , instance_id(instance_id_) + , asof_inequality(table_join->getAsofInequality()) + , data(std::make_shared()) + , tmp_data( + table_join_->getTempDataOnDisk() + ? std::make_unique(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin) + : nullptr) + , right_sample_block(right_sample_block_) + , max_joined_block_rows(table_join->maxJoinedBlockRows()) + , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") + , log(getLogger("HashJoin")) +{ + LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", + instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); + + validateAdditionalFilterExpression(table_join->getMixedJoinExpression()); + + used_flags = std::make_unique(); + + if (isCrossOrComma(kind)) + { + data->type = Type::CROSS; + sample_block_with_columns_to_add = right_sample_block; + } + else if (table_join->getClauses().empty()) + { + data->type = Type::EMPTY; + /// We might need to insert default values into the right columns, materialize them + sample_block_with_columns_to_add = materializeBlock(right_sample_block); + } + else if (table_join->oneDisjunct()) + { + const auto & key_names_right = table_join->getOnlyClause().key_names_right; + JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, sample_block_with_columns_to_add); + required_right_keys = table_join->getRequiredRightKeys(right_table_keys, required_right_keys_sources); + } + else + { + /// required right keys concept does not work well if multiple disjuncts, we need all keys + sample_block_with_columns_to_add = right_table_keys = materializeBlock(right_sample_block); + } + + materializeBlockInplace(right_table_keys); + initRightBlockStructure(data->sample_block); + data->sample_block = prepareRightBlock(data->sample_block); + + JoinCommon::createMissedColumns(sample_block_with_columns_to_add); + + size_t disjuncts_num = table_join->getClauses().size(); + data->maps.resize(disjuncts_num); + key_sizes.reserve(disjuncts_num); + + for (const auto & clause : table_join->getClauses()) + { + const auto & key_names_right = clause.key_names_right; + ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(right_table_keys, key_names_right); + + if (strictness == JoinStrictness::Asof) + { + assert(disjuncts_num == 1); + + /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. + /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. + if (!isLeft(kind) && !isInner(kind)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); + + if (key_columns.size() <= 1) + throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); + + size_t asof_size; + asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); + key_columns.pop_back(); + + /// this is going to set up the appropriate hash table for the direct lookup part of the join + /// However, this does not depend on the size of the asof join key (as that goes into the BST) + /// Therefore, add it back in such that it can be extracted appropriately from the full stored + /// key_columns and key_sizes + auto & asof_key_sizes = key_sizes.emplace_back(); + data->type = chooseMethod(kind, key_columns, asof_key_sizes); + asof_key_sizes.push_back(asof_size); + } + else + { + /// Choose data structure to use for JOIN. + auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back()); + if (data->type == Type::EMPTY) + data->type = current_join_method; + else if (data->type != current_join_method) + data->type = Type::hashed; + } + } + + for (auto & maps : data->maps) + dataMapInit(maps); +} + +HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) +{ + size_t keys_size = key_columns.size(); + + if (keys_size == 0) + { + if (isCrossOrComma(kind)) + return Type::CROSS; + return Type::EMPTY; + } + + bool all_fixed = true; + size_t keys_bytes = 0; + key_sizes.resize(keys_size); + for (size_t j = 0; j < keys_size; ++j) + { + if (!key_columns[j]->isFixedAndContiguous()) + { + all_fixed = false; + break; + } + key_sizes[j] = key_columns[j]->sizeOfValueIfFixed(); + keys_bytes += key_sizes[j]; + } + + /// If there is one numeric key that fits in 64 bits + if (keys_size == 1 && key_columns[0]->isNumeric()) + { + size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); + if (size_of_field == 1) + return Type::key8; + if (size_of_field == 2) + return Type::key16; + if (size_of_field == 4) + return Type::key32; + if (size_of_field == 8) + return Type::key64; + if (size_of_field == 16) + return Type::keys128; + if (size_of_field == 32) + return Type::keys256; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + } + + /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys + if (all_fixed && keys_bytes <= 16) + return Type::keys128; + if (all_fixed && keys_bytes <= 32) + return Type::keys256; + + /// If there is single string key, use hash table of it's values. + if (keys_size == 1) + { + auto is_string_column = [](const IColumn * column_ptr) -> bool + { + if (const auto * lc_column_ptr = typeid_cast(column_ptr)) + return typeid_cast(lc_column_ptr->getDictionary().getNestedColumn().get()); + return typeid_cast(column_ptr); + }; + + const auto * key_column = key_columns[0]; + if (is_string_column(key_column) || + (isColumnConst(*key_column) && is_string_column(assert_cast(key_column)->getDataColumnPtr().get()))) + return Type::key_string; + } + + if (keys_size == 1 && typeid_cast(key_columns[0])) + return Type::key_fixed_string; + + /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. + return Type::hashed; +} + +template +static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) +{ + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); +} + +void HashJoin::dataMapInit(MapsVariant & map) +{ + if (kind == JoinKind::Cross) + return; + joinDispatchInit(kind, strictness, map); + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); + + if (reserve_num) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); + } + + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HashJoin::dataMapInit called with empty data"); +} + +bool HashJoin::empty() const +{ + return data->type == Type::EMPTY; +} + +bool HashJoin::alwaysReturnsEmptySet() const +{ + return isInnerOrRight(getKind()) && data->empty; +} + +size_t HashJoin::getTotalRowCount() const +{ + if (!data) + return 0; + + size_t res = 0; + + if (data->type == Type::CROSS) + { + for (const auto & block : data->blocks) + res += block.rows(); + } + else + { + for (const auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); + } + } + + return res; +} + +size_t HashJoin::getTotalByteCount() const +{ + if (!data) + return 0; + +#ifndef NDEBUG + size_t debug_blocks_allocated_size = 0; + for (const auto & block : data->blocks) + debug_blocks_allocated_size += block.allocatedBytes(); + + if (data->blocks_allocated_size != debug_blocks_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", + data->blocks_allocated_size, debug_blocks_allocated_size); + + size_t debug_blocks_nullmaps_allocated_size = 0; + for (const auto & nullmap : data->blocks_nullmaps) + debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); + + if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", + data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +#endif + + size_t res = 0; + + res += data->blocks_allocated_size; + res += data->blocks_nullmaps_allocated_size; + res += data->pool.allocatedBytes(); + + if (data->type != Type::CROSS) + { + for (const auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); + } + } + return res; +} + +void HashJoin::initRightBlockStructure(Block & saved_block_sample) +{ + if (isCrossOrComma(kind)) + { + /// cross join doesn't have keys, just add all columns + saved_block_sample = sample_block_with_columns_to_add.cloneEmpty(); + return; + } + + bool multiple_disjuncts = !table_join->oneDisjunct(); + /// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any). + bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || + table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) || + isRightOrFull(kind) || + multiple_disjuncts || + table_join->getMixedJoinExpression(); + if (save_key_columns) + { + saved_block_sample = right_table_keys.cloneEmpty(); + } + else if (strictness == JoinStrictness::Asof) + { + /// Save ASOF key + saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1)); + } + + /// Save non key columns + for (auto & column : sample_block_with_columns_to_add) + { + if (auto * col = saved_block_sample.findByName(column.name)) + *col = column; + else + saved_block_sample.insert(column); + } +} + +Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) +{ + Block structured_block; + for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) + { + ColumnWithTypeAndName column = block.getByName(sample_column.name); + + /// There's no optimization for right side const columns. Remove constness if any. + column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); + + if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) + { + column.column = column.column->convertToFullColumnIfLowCardinality(); + column.type = removeLowCardinality(column.type); + } + + if (sample_column.column->isNullable()) + JoinCommon::convertColumnToNullable(column); + + structured_block.insert(std::move(column)); + } + + return structured_block; +} + +Block HashJoin::prepareRightBlock(const Block & block) const +{ + return prepareRightBlock(block, savedBlockSample()); +} + +bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) +{ + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); + + /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. + /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. + if (unlikely(source_block_.rows() > std::numeric_limits::max())) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows()); + + /** We do not allocate memory for stored blocks inside HashJoin, only for hash table. + * In case when we have all the blocks allocated before the first `addBlockToJoin` call, will already be quite high. + * In that case memory consumed by stored blocks will be underestimated. + */ + if (!memory_usage_before_adding_blocks) + memory_usage_before_adding_blocks = getCurrentQueryMemoryUsage(); + + Block source_block = source_block_; + if (strictness == JoinStrictness::Asof) + { + chassert(kind == JoinKind::Left || kind == JoinKind::Inner); + + /// Filter out rows with NULLs in ASOF key, nulls are not joined with anything since they are not comparable + /// We support only INNER/LEFT ASOF join, so rows with NULLs never return from the right joined table. + /// So filter them out here not to handle in implementation. + const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back(); + auto & asof_column = source_block.getByName(asof_key_name); + + if (asof_column.type->isNullable()) + { + /// filter rows with nulls in asof key + if (const auto * asof_const_column = typeid_cast(asof_column.column.get())) + { + if (asof_const_column->isNullAt(0)) + return false; + } + else + { + const auto & asof_column_nullable = assert_cast(*asof_column.column).getNullMapData(); + + NullMap negative_null_map(asof_column_nullable.size()); + for (size_t i = 0; i < asof_column_nullable.size(); ++i) + negative_null_map[i] = !asof_column_nullable[i]; + + for (auto & column : source_block) + column.column = column.column->filter(negative_null_map, -1); + } + } + } + + size_t rows = source_block.rows(); + + const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right); + ColumnPtrMap all_key_columns(right_key_names.size()); + for (const auto & column_name : right_key_names) + { + const auto & column = source_block.getByName(column_name).column; + all_key_columns[column_name] = recursiveRemoveSparse(column->convertToFullColumnIfConst())->convertToFullColumnIfLowCardinality(); + } + + Block block_to_save = prepareRightBlock(source_block); + if (shrink_blocks) + block_to_save = block_to_save.shrinkToFit(); + + size_t max_bytes_in_join = table_join->sizeLimits().max_bytes; + size_t max_rows_in_join = table_join->sizeLimits().max_rows; + + if (kind == JoinKind::Cross && tmp_data + && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join) + || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join))) + { + if (tmp_stream == nullptr) + { + tmp_stream = &tmp_data->createStream(right_sample_block); + } + tmp_stream->write(block_to_save); + return true; + } + + size_t total_rows = 0; + size_t total_bytes = 0; + { + if (storage_join_lock) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); + + assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block"); + + size_t min_bytes_to_compress = table_join->crossJoinMinBytesToCompress(); + size_t min_rows_to_compress = table_join->crossJoinMinRowsToCompress(); + + if (kind == JoinKind::Cross + && ((min_bytes_to_compress && getTotalByteCount() >= min_bytes_to_compress) + || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) + { + block_to_save = block_to_save.compress(); + } + + data->blocks_allocated_size += block_to_save.allocatedBytes(); + data->blocks.emplace_back(std::move(block_to_save)); + Block * stored_block = &data->blocks.back(); + + if (rows) + data->empty = false; + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + const auto & onexprs = table_join->getClauses(); + for (size_t onexpr_idx = 0; onexpr_idx < onexprs.size(); ++onexpr_idx) + { + ColumnRawPtrs key_columns; + for (const auto & name : onexprs[onexpr_idx].key_names_right) + key_columns.push_back(all_key_columns[name].get()); + + /// We will insert to the map only keys, where all components are not NULL. + ConstNullMapPtr null_map{}; + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + + /// If RIGHT or FULL save blocks with nulls for NotJoinedBlocks + UInt8 save_nullmap = 0; + if (isRightOrFull(kind) && null_map) + { + /// Save rows with NULL keys + for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i) + save_nullmap |= (*null_map)[i]; + } + + auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); + /// Save blocks that do not hold conditions in ON section + ColumnUInt8::MutablePtr not_joined_map = nullptr; + if (!flag_per_row && isRightOrFull(kind) && join_mask_col.hasData()) + { + const auto & join_mask = join_mask_col.getData(); + /// Save rows that do not hold conditions + not_joined_map = ColumnUInt8::create(rows, 0); + for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) + { + /// Condition hold, do not save row + if ((*join_mask)[i]) + continue; + + /// NULL key will be saved anyway because, do not save twice + if (save_nullmap && (*null_map)[i]) + continue; + + not_joined_map->getData()[i] = 1; + } + } + + bool is_inserted = false; + if (kind != JoinKind::Cross) + { + joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) + { + size_t size = HashJoinMethods>::insertFromBlockImpl( + *this, + data->type, + map, + rows, + key_columns, + key_sizes[onexpr_idx], + stored_block, + null_map, + join_mask_col.getData(), + data->pool, + is_inserted); + + if (flag_per_row) + used_flags->reinit(stored_block); + else if (is_inserted) + /// Number of buckets + 1 value from zero storage + used_flags->reinit(size + 1); + }); + } + + if (!flag_per_row && save_nullmap && is_inserted) + { + data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); + data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); + } + + if (!flag_per_row && not_joined_map && is_inserted) + { + data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); + data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); + } + + if (!flag_per_row && !is_inserted) + { + LOG_TRACE(log, "Skipping inserting block with {} rows", rows); + data->blocks_allocated_size -= stored_block->allocatedBytes(); + data->blocks.pop_back(); + } + + if (!check_limits) + return true; + + /// TODO: Do not calculate them every time + total_rows = getTotalRowCount(); + total_bytes = getTotalByteCount(); + } + } + + shrinkStoredBlocksToFit(total_bytes); + + return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); +} + +void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join) +{ + if (shrink_blocks) + return; /// Already shrunk + + Int64 current_memory_usage = getCurrentQueryMemoryUsage(); + Int64 query_memory_usage_delta = current_memory_usage - memory_usage_before_adding_blocks; + Int64 max_total_bytes_for_query = memory_usage_before_adding_blocks ? table_join->getMaxMemoryUsage() : 0; + + auto max_total_bytes_in_join = table_join->sizeLimits().max_bytes; + + /** If accounted data size is more than half of `max_bytes_in_join` + * or query memory consumption growth from the beginning of adding blocks (estimation of memory consumed by join using memory tracker) + * is bigger than half of all memory available for query, + * then shrink stored blocks to fit. + */ + shrink_blocks = (max_total_bytes_in_join && total_bytes_in_join > max_total_bytes_in_join / 2) || + (max_total_bytes_for_query && query_memory_usage_delta > max_total_bytes_for_query / 2); + if (!shrink_blocks) + return; + + LOG_DEBUG(log, "Shrinking stored blocks, memory consumption is {} {} calculated by join, {} {} by memory tracker", + ReadableSize(total_bytes_in_join), max_total_bytes_in_join ? fmt::format("/ {}", ReadableSize(max_total_bytes_in_join)) : "", + ReadableSize(query_memory_usage_delta), max_total_bytes_for_query ? fmt::format("/ {}", ReadableSize(max_total_bytes_for_query)) : ""); + + for (auto & stored_block : data->blocks) + { + size_t old_size = stored_block.allocatedBytes(); + stored_block = stored_block.shrinkToFit(); + size_t new_size = stored_block.allocatedBytes(); + + if (old_size >= new_size) + { + if (data->blocks_allocated_size < old_size - new_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Blocks allocated size value is broken: " + "blocks_allocated_size = {}, old_size = {}, new_size = {}", + data->blocks_allocated_size, old_size, new_size); + + data->blocks_allocated_size -= old_size - new_size; + } + else + /// Sometimes after clone resized block can be bigger than original + data->blocks_allocated_size += new_size - old_size; + } + + auto new_total_bytes_in_join = getTotalByteCount(); + + Int64 new_current_memory_usage = getCurrentQueryMemoryUsage(); + + LOG_DEBUG(log, "Shrunk stored blocks {} freed ({} by memory tracker), new memory consumption is {} ({} by memory tracker)", + ReadableSize(total_bytes_in_join - new_total_bytes_in_join), ReadableSize(current_memory_usage - new_current_memory_usage), + ReadableSize(new_total_bytes_in_join), ReadableSize(new_current_memory_usage)); + + total_bytes_in_join = new_total_bytes_in_join; +} + +void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const +{ + size_t start_left_row = 0; + size_t start_right_block = 0; + std::unique_ptr reader = nullptr; + if (not_processed) + { + auto & continuation = static_cast(*not_processed); + start_left_row = continuation.left_position; + start_right_block = continuation.right_block; + reader = std::move(continuation.reader); + not_processed.reset(); + } + + size_t num_existing_columns = block.columns(); + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + ColumnRawPtrs src_left_columns; + MutableColumns dst_columns; + + { + src_left_columns.reserve(num_existing_columns); + dst_columns.reserve(num_existing_columns + num_columns_to_add); + + for (const ColumnWithTypeAndName & left_column : block) + { + src_left_columns.push_back(left_column.column.get()); + dst_columns.emplace_back(src_left_columns.back()->cloneEmpty()); + } + + for (const ColumnWithTypeAndName & right_column : sample_block_with_columns_to_add) + dst_columns.emplace_back(right_column.column->cloneEmpty()); + + for (auto & dst : dst_columns) + dst->reserve(max_joined_block_rows); + } + + size_t rows_left = block.rows(); + size_t rows_added = 0; + for (size_t left_row = start_left_row; left_row < rows_left; ++left_row) + { + size_t block_number = 0; + + auto process_right_block = [&](const Block & block_right) + { + size_t rows_right = block_right.rows(); + rows_added += rows_right; + + for (size_t col_num = 0; col_num < num_existing_columns; ++col_num) + dst_columns[col_num]->insertManyFrom(*src_left_columns[col_num], left_row, rows_right); + + for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num) + { + const IColumn & column_right = *block_right.getByPosition(col_num).column; + dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right); + } + }; + + for (const Block & compressed_block_right : data->blocks) + { + ++block_number; + if (block_number < start_right_block) + continue; + + auto block_right = compressed_block_right.decompress(); + process_right_block(block_right); + if (rows_added > max_joined_block_rows) + { + break; + } + } + + if (tmp_stream && rows_added <= max_joined_block_rows) + { + if (reader == nullptr) + { + tmp_stream->finishWritingAsyncSafe(); + reader = tmp_stream->getReadStream(); + } + while (auto block_right = reader->read()) + { + ++block_number; + process_right_block(block_right); + if (rows_added > max_joined_block_rows) + { + break; + } + } + + /// It means, that reader->read() returned {} + if (rows_added <= max_joined_block_rows) + { + reader.reset(); + } + } + + start_right_block = 0; + + if (rows_added > max_joined_block_rows) + { + not_processed = std::make_shared( + NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)}); + not_processed->block.swap(block); + break; + } + } + + for (const ColumnWithTypeAndName & src_column : sample_block_with_columns_to_add) + block.insert(src_column); + + block = block.cloneWithColumns(std::move(dst_columns)); +} + +DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const +{ + size_t num_keys = data_types.size(); + if (right_table_keys.columns() != num_keys) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", + toString(or_null ? "OrNull" : ""), toString(num_keys)); + + for (size_t i = 0; i < num_keys; ++i) + { + const auto & left_type_origin = data_types[i]; + const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i); + auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); + auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); + if (!left_type->equals(*right_type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " + "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); + } + + if (!sample_block_with_columns_to_add.has(column_name)) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); + + auto elem = sample_block_with_columns_to_add.getByName(column_name); + if (or_null && JoinCommon::canBecomeNullable(elem.type)) + elem.type = makeNullable(elem.type); + return elem.type; +} + +/// TODO: return multiple columns as named tuple +/// TODO: return array of values when strictness == JoinStrictness::All +ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const +{ + bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) + && kind == JoinKind::Left; + if (!is_valid) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); + const auto & key_names_right = table_join->getOnlyClause().key_names_right; + + /// Assemble the key block with correct names. + Block keys; + for (size_t i = 0; i < block.columns(); ++i) + { + auto key = block.getByPosition(i); + key.name = key_names_right[i]; + keys.insert(std::move(key)); + } + + static_assert(!MapGetter::flagged, + "joinGet are not protected from hash table changes between block processing"); + + std::vector maps_vector; + maps_vector.push_back(&std::get(data->maps[0])); + HashJoinMethods::joinBlockImpl(*this, keys, block_with_columns_to_add, maps_vector, /* is_join_get = */ true); + return keys.getByPosition(keys.columns() - 1); +} + +void HashJoin::checkTypesOfKeys(const Block & block) const +{ + for (const auto & onexpr : table_join->getClauses()) + { + JoinCommon::checkTypesOfKeys(block, onexpr.key_names_left, right_table_keys, onexpr.key_names_right); + } +} + +void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) +{ + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + + for (const auto & onexpr : table_join->getClauses()) + { + auto cond_column_name = onexpr.condColumnNames(); + JoinCommon::checkTypesOfKeys( + block, onexpr.key_names_left, cond_column_name.first, + right_sample_block, onexpr.key_names_right, cond_column_name.second); + } + + if (kind == JoinKind::Cross) + { + joinBlockImplCross(block, not_processed); + return; + } + + if (kind == JoinKind::Right || kind == JoinKind::Full) + { + materializeBlockInplace(block); + } + + { + std::vectormaps[0])> * > maps_vector; + for (size_t i = 0; i < table_join->getClauses().size(); ++i) + maps_vector.push_back(&data->maps[i]); + + if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) + { + using MapType = typename MapGetter::Map; + Block remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + if (remaining_block.rows()) + not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); + else + not_processed.reset(); + })) + { + /// Joined + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong JOIN combination: {} {}", strictness, kind); + } +} + +HashJoin::~HashJoin() +{ + if (!data) + { + LOG_TEST(log, "{}Join data has been already released", instance_log_id); + return; + } + LOG_TEST( + log, + "{}Join data is being destroyed, {} bytes and {} rows in hash table", + instance_log_id, + getTotalByteCount(), + getTotalRowCount()); +} + +template +struct AdderNonJoined +{ + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) + { + constexpr bool mapped_asof = std::is_same_v; + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + + if constexpr (mapped_asof) + { + /// Do nothing + } + else if constexpr (mapped_one) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = mapped.block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, mapped.row_num); + } + + ++rows_added; + } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = it->block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, it->row_num); + } + + ++rows_added; + } + } + } +}; + +/// Stream from not joined earlier rows of the right table. +/// Based on: +/// - map offsetInternal saved in used_flags for single disjuncts +/// - flags in BlockWithFlags for multiple disjuncts +class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller +{ +public: + NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool flag_per_row_) + : parent(parent_) + , max_block_size(max_block_size_) + , flag_per_row(flag_per_row_) + , current_block_start(0) + { + if (parent.data == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + } + + Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } + + size_t fillColumns(MutableColumns & columns_right) override + { + size_t rows_added = 0; + if (unlikely(parent.data->type == HashJoin::Type::EMPTY)) + { + rows_added = fillColumnsFromData(parent.data->blocks, columns_right); + } + else + { + auto fill_callback = [&](auto, auto, auto & map) + { + rows_added = fillColumnsFromMap(map, columns_right); + }; + + if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); + } + + if (!flag_per_row) + { + fillNullsFromBlocks(columns_right, rows_added); + } + + return rows_added; + } + +private: + const HashJoin & parent; + UInt64 max_block_size; + bool flag_per_row; + + size_t current_block_start; + + std::any position; + std::optional nulls_position; + std::optional used_position; + + size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right) + { + if (!position.has_value()) + position = std::make_any(blocks.begin()); + + auto & block_it = std::any_cast(position); + auto end = blocks.end(); + + size_t rows_added = 0; + for (; block_it != end; ++block_it) + { + size_t rows_from_block = std::min(max_block_size - rows_added, block_it->rows() - current_block_start); + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & col = block_it->getByPosition(j).column; + columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block); + } + rows_added += rows_from_block; + + if (rows_added >= max_block_size) + { + /// How many rows have been read + current_block_start += rows_from_block; + if (block_it->rows() <= current_block_start) + { + /// current block was fully read + ++block_it; + current_block_start = 0; + } + break; + } + current_block_start = 0; + } + return rows_added; + } + + template + size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) + { + switch (parent.data->type) + { + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return fillColumns(*maps.TYPE, columns_keys_and_right); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); + } + } + + template + size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) + { + size_t rows_added = 0; + + if (flag_per_row) + { + if (!used_position.has_value()) + used_position = parent.data->blocks.begin(); + + auto end = parent.data->blocks.end(); + + for (auto & it = *used_position; it != end && rows_added < max_block_size; ++it) + { + const Block & mapped_block = *it; + + for (size_t row = 0; row < mapped_block.rows(); ++row) + { + if (!parent.isUsed(&mapped_block, row)) + { + for (size_t colnum = 0; colnum < columns_keys_and_right.size(); ++colnum) + { + columns_keys_and_right[colnum]->insertFrom(*mapped_block.getByPosition(colnum).column, row); + } + + ++rows_added; + } + } + } + } + else + { + using Mapped = typename Map::mapped_type; + using Iterator = typename Map::const_iterator; + + + if (!position.has_value()) + position = std::make_any(map.begin()); + + Iterator & it = std::any_cast(position); + auto end = map.end(); + + for (; it != end; ++it) + { + const Mapped & mapped = it->getMapped(); + + size_t offset = map.offsetInternal(it.getPtr()); + if (parent.isUsed(offset)) + continue; + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); + + if (rows_added >= max_block_size) + { + ++it; + break; + } + } + } + + return rows_added; + } + + void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) + { + if (!nulls_position.has_value()) + nulls_position = parent.data->blocks_nullmaps.begin(); + + auto end = parent.data->blocks_nullmaps.end(); + + for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) + { + const auto * block = it->first; + ConstNullMapPtr nullmap = nullptr; + if (it->second) + nullmap = &assert_cast(*it->second).getData(); + + for (size_t row = 0; row < block->rows(); ++row) + { + if (nullmap && (*nullmap)[row]) + { + for (size_t col = 0; col < columns_keys_and_right.size(); ++col) + columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); + ++rows_added; + } + } + } + } +}; + +IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, + const Block & result_sample_block, + UInt64 max_block_size) const +{ + if (!JoinCommon::hasNonJoinedBlocks(*table_join)) + return {}; + size_t left_columns_count = left_sample_block.columns(); + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (!flag_per_row) + { + /// With multiple disjuncts, all keys are in sample_block_with_columns_to_add, so invariant is not held + size_t expected_columns_count = left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns(); + if (expected_columns_count != result_sample_block.columns()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of columns in result sample block: {} instead of {} ({} + {} + {})", + result_sample_block.columns(), expected_columns_count, + left_columns_count, required_right_keys.columns(), sample_block_with_columns_to_add.columns()); + } + } + + auto non_joined = std::make_unique(*this, max_block_size, flag_per_row); + return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); +} + +void HashJoin::reuseJoinedData(const HashJoin & join) +{ + data = join.data; + from_storage_join = true; + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (flag_per_row) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); + + for (auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) + { + used_flags->reinit(map_.getBufferSizeInCells(data->type) + 1); + }); + } +} + +BlocksList HashJoin::releaseJoinedBlocks(bool restructure) +{ + LOG_TRACE(log, "{}Join data is being released, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); + + BlocksList right_blocks = std::move(data->blocks); + if (!restructure) + { + data.reset(); + return right_blocks; + } + + data->maps.clear(); + data->blocks_nullmaps.clear(); + + BlocksList restored_blocks; + + /// names to positions optimization + std::vector positions; + std::vector is_nullable; + if (!right_blocks.empty()) + { + positions.reserve(right_sample_block.columns()); + const Block & tmp_block = *right_blocks.begin(); + for (const auto & sample_column : right_sample_block) + { + positions.emplace_back(tmp_block.getPositionByName(sample_column.name)); + is_nullable.emplace_back(isNullableOrLowCardinalityNullable(sample_column.type)); + } + } + + for (Block & saved_block : right_blocks) + { + Block restored_block; + for (size_t i = 0; i < positions.size(); ++i) + { + auto & column = saved_block.getByPosition(positions[i]); + correctNullabilityInplace(column, is_nullable[i]); + restored_block.insert(column); + } + restored_blocks.emplace_back(std::move(restored_block)); + } + + data.reset(); + return restored_blocks; +} + +const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const +{ + /// It should be nullable when right side is nullable + return savedBlockSample().getByName(table_join->getOnlyClause().key_names_right.back()); +} + +void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additional_filter_expression) +{ + if (!additional_filter_expression) + return; + + Block expression_sample_block = additional_filter_expression->getSampleBlock(); + + if (expression_sample_block.columns() != 1) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected single column, got '{}'", + expression_sample_block.dumpStructure()); + } + + auto type = removeNullable(expression_sample_block.getByPosition(0).type); + if (!type->equals(*std::make_shared())) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected boolean (UInt8), got '{}'. expression:\n{}", + expression_sample_block.getByPosition(0).type->getName(), + additional_filter_expression->dumpActions()); + } + + bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); + if (!is_supported) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs", + expression_sample_block.getByPosition(0).name); + } +} + +bool HashJoin::isUsed(size_t off) const +{ + return used_flags->getUsedSafe(off); +} + +bool HashJoin::isUsed(const Block * block_ptr, size_t row_idx) const +{ + return used_flags->getUsedSafe(block_ptr, row_idx); +} + + +bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const +{ + if (!table_join_->oneDisjunct()) + return true; + /// If it'a a all right join with inequal conditions, we need to mark each row + if (table_join_->getMixedJoinExpression() && isRightOrFull(table_join_->kind())) + return true; + return false; +} + +} diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin/HashJoin.h similarity index 91% rename from src/Interpreters/HashJoin.h rename to src/Interpreters/HashJoin/HashJoin.h index 56a1768a7ff..0b115b9fdbb 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin/HashJoin.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -36,47 +37,13 @@ class ExpressionActions; namespace JoinStuff { - /// Flags needed to implement RIGHT and FULL JOINs. -class JoinUsedFlags -{ - using RawBlockPtr = const Block *; - using UsedFlagsForBlock = std::vector; - - /// For multiple dijuncts each empty in hashmap stores flags for particular block - /// For single dicunct we store all flags in `nullptr` entry, index is the offset in FindResult - std::unordered_map flags; - - bool need_flags; - -public: - /// Update size for vector with flags. - /// Calling this method invalidates existing flags. - /// It can be called several times, but all of them should happen before using this structure. - template - void reinit(size_t size_); - - template - void reinit(const Block * block_ptr); - - bool getUsedSafe(size_t i) const; - bool getUsedSafe(const Block * block_ptr, size_t row_idx) const; - - template - void setUsed(const T & f); - - template - void setUsed(const Block * block, size_t row_num, size_t offset); - - template - bool getUsed(const T & f); - - template - bool setUsedOnce(const T & f); -}; - +class JoinUsedFlags; } +template +class HashJoinMethods; + /** Data structure for implementation of JOIN. * It is just a hash table: keys -> rows of joined ("right") table. * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys. @@ -400,8 +367,8 @@ public: const Block & savedBlockSample() const { return data->sample_block; } - bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); } - bool isUsed(const Block * block_ptr, size_t row_idx) const { return used_flags.getUsedSafe(block_ptr, row_idx); } + bool isUsed(size_t off) const; + bool isUsed(const Block * block_ptr, size_t row_idx) const; void debugKeys() const; @@ -414,6 +381,9 @@ private: friend class JoinSource; + template + friend class HashJoinMethods; + std::shared_ptr table_join; const JoinKind kind; const JoinStrictness strictness; @@ -433,8 +403,7 @@ private: /// Number of this flags equals to hashtable buffer size (plus one for zero value). /// Changes in hash table broke correspondence, /// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock) - mutable JoinStuff::JoinUsedFlags used_flags; - + mutable std::unique_ptr used_flags; RightTableDataPtr data; bool have_compressed = false; @@ -476,13 +445,6 @@ private: void initRightBlockStructure(Block & saved_block_sample); - template - Block joinBlockImpl( - Block & block, - const Block & block_with_columns_to_add, - const std::vector & maps_, - bool is_join_get = false) const; - void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; static Type chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h new file mode 100644 index 00000000000..b1a497c1c1c --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -0,0 +1,956 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNSUPPORTED_JOIN_KEYS; + extern const int LOGICAL_ERROR; +} + +/// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. +template +struct Inserter +{ + static ALWAYS_INLINE bool + insertOne(const HashJoin & join, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + + if (emplace_result.isInserted() || join.anyTakeLastRow()) + { + new (&emplace_result.getMapped()) typename HashMap::mapped_type(stored_block, i); + return true; + } + return false; + } + + static ALWAYS_INLINE void insertAll(const HashJoin &, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + + if (emplace_result.isInserted()) + new (&emplace_result.getMapped()) typename HashMap::mapped_type(stored_block, i); + else + { + /// The first element of the list is stored in the value of the hash table, the rest in the pool. + emplace_result.getMapped().insert({stored_block, i}, pool); + } + } + + static ALWAYS_INLINE void insertAsof( + HashJoin & join, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn & asof_column) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + typename HashMap::mapped_type * time_series_map = &emplace_result.getMapped(); + + TypeIndex asof_type = *join.getAsofType(); + if (emplace_result.isInserted()) + time_series_map = new (time_series_map) typename HashMap::mapped_type(createAsofRowRef(asof_type, join.getAsofInequality())); + (*time_series_map)->insert(asof_column, stored_block, i); + } +}; + + +/// MapsTemplate is one of MapsOne, MapsAll and MapsAsof +template +class HashJoinMethods +{ +public: + static size_t insertFromBlockImpl( + HashJoin & join, + HashJoin::Type type, + MapsTemplate & maps, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) + { + switch (type) + { + case HashJoin::Type::EMPTY: + [[fallthrough]]; + case HashJoin::Type::CROSS: + /// Do nothing. We will only save block, and it is enough + is_inserted = true; + return 0; + + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return insertFromBlockImplTypeCase>::Type>(\ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ + break; + + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + } + } + + using MapsTemplateVector = std::vector; + + static Block joinBlockImpl( + const HashJoin & join, + Block & block, + const Block & block_with_columns_to_add, + const MapsTemplateVector & maps_, + bool is_join_get = false) + { + constexpr JoinFeatures join_features; + + std::vector join_on_keys; + const auto & onexprs = join.table_join->getClauses(); + for (size_t i = 0; i < onexprs.size(); ++i) + { + const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; + join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); + } + size_t existing_columns = block.columns(); + + /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. + * Because if they are constants, then in the "not joined" rows, they may have different values + * - default values, which can differ from the values of these constants. + */ + if constexpr (join_features.right || join_features.full) + { + materializeBlockInplace(block); + } + + /** For LEFT/INNER JOIN, the saved blocks do not contain keys. + * For FULL/RIGHT JOIN, the saved blocks contain keys; + * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. + * For ASOF, the last column is used as the ASOF column + */ + AddedColumns added_columns( + block, + block_with_columns_to_add, + join.savedBlockSample(), + join, + std::move(join_on_keys), + join.table_join->getMixedJoinExpression(), + join_features.is_asof_join, + is_join_get); + + bool has_required_right_keys = (join.required_right_keys.columns() != 0); + added_columns.need_filter = join_features.need_filter || has_required_right_keys; + added_columns.max_joined_block_rows = join.max_joined_block_rows; + if (!added_columns.max_joined_block_rows) + added_columns.max_joined_block_rows = std::numeric_limits::max(); + else + added_columns.reserve(join_features.need_replication); + + size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); + /// Do not hold memory for join_on_keys anymore + added_columns.join_on_keys.clear(); + Block remaining_block = sliceBlock(block, num_joined); + + added_columns.buildOutput(); + for (size_t i = 0; i < added_columns.size(); ++i) + block.insert(added_columns.moveColumn(i)); + + std::vector right_keys_to_replicate [[maybe_unused]]; + + if constexpr (join_features.need_filter) + { + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); + + /// Add join key columns from right block if needed using value from left table because of equality + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); + block.insert(std::move(right_col)); + } + } + else if (has_required_right_keys) + { + /// Add join key columns from right block if needed. + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); + block.insert(std::move(right_col)); + + if constexpr (join_features.need_replication) + right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); + } + } + + if constexpr (join_features.need_replication) + { + std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; + + /// If ALL ... JOIN - we replicate all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + { + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } + + /// Replicate additional right keys + for (size_t pos : right_keys_to_replicate) + { + block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + } + } + + return remaining_block; + } + +private: + template + static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) + { + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); + } + + template + static size_t NO_INLINE insertFromBlockImplTypeCase( + HashJoin & join, HashMap & map, size_t rows, const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) + { + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + + const IColumn * asof_column [[maybe_unused]] = nullptr; + if constexpr (is_asof_join) + asof_column = key_columns.back(); + + auto key_getter = createKeyGetter(key_columns, key_sizes); + + /// For ALL and ASOF join always insert values + is_inserted = !mapped_one || is_asof_join; + + for (size_t i = 0; i < rows; ++i) + { + if (null_map && (*null_map)[i]) + { + /// nulls are not inserted into hash table, + /// keep them for RIGHT and FULL joins + is_inserted = true; + continue; + } + + /// Check condition for right table from ON section + if (join_mask && !(*join_mask)[i]) + continue; + + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); + else if constexpr (mapped_one) + is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); + else + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); + } + return map.getBufferSizeInCells(); + } + + template + static size_t switchJoinRightColumns( + const std::vector & mapv, + AddedColumns & added_columns, + HashJoin::Type type, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + switch (type) + { + case HashJoin::Type::EMPTY: { + if constexpr (!is_asof_join) + { + using KeyGetter = KeyGetterEmpty; + std::vector key_getter_vector; + key_getter_vector.emplace_back(); + + using MapTypeVal = typename KeyGetter::MappedType; + std::vector a_map_type_vector; + a_map_type_vector.emplace_back(); + return joinRightColumnsSwitchNullability( + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); + } + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); + } + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + { \ + using MapTypeVal = const typename std::remove_reference_t::element_type; \ + using KeyGetter = typename KeyGetterForType::Type; \ + std::vector a_map_type_vector(mapv.size()); \ + std::vector key_getter_vector; \ + for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ + { \ + const auto & join_on_key = added_columns.join_on_keys[d]; \ + a_map_type_vector[d] = mapv[d]->TYPE.get(); \ + key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ + } \ + return joinRightColumnsSwitchNullability( \ + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ + } + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); + } + } + + template + static size_t joinRightColumnsSwitchNullability( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + if (added_columns.need_filter) + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + else + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + } + + template + static size_t joinRightColumnsSwitchMultipleDisjuncts( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr JoinFeatures join_features; + if constexpr (join_features.is_all_join) + { + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), + mapv, + added_columns, + used_flags, + need_filter, + join_features.need_flags, + join_features.add_missing, + mark_per_row_used); + } + } + + if (added_columns.additional_filter_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); + + return mapv.size() > 1 ? joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + + /// Joins right table columns which indexes are present in right_indexes using specified map. + /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). + template + static size_t joinRightColumns( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr JoinFeatures join_features; + + size_t rows = added_columns.rows_to_add; + if constexpr (need_filter) + added_columns.filter = IColumn::Filter(rows, 0); + + Arena pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t i = 0; + for (; i < rows; ++i) + { + if constexpr (join_features.need_replication) + { + if (unlikely(current_offset >= max_joined_block_rows)) + { + added_columns.offsets_to_replicate->resize_assume_reserved(i); + added_columns.filter.resize_assume_reserved(i); + break; + } + } + + bool right_row_found = false; + + KnownRowsHolder known_rows; + for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) + { + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + + if (find_result.isFound()) + { + right_row_found = true; + auto & mapped = find_result.getMapped(); + if constexpr (join_features.is_asof_join) + { + const IColumn & left_asof_key = added_columns.leftAsofKey(); + + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) + { + setUsed(added_columns.filter, i); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); + else + used_flags.template setUsed(find_result); + + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (join_features.is_all_join) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) + { + /// Use first appeared left key + it needs left columns replication + bool used_once = used_flags.template setUsedOnce(find_result); + if (used_once) + { + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + setUsed(added_columns.filter, i); + addFoundRowAll( + mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + } + else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) + { + bool used_once = used_flags.template setUsedOnce(find_result); + + /// Use first appeared left key only + if (used_once) + { + setUsed(added_columns.filter, i); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + } + + break; + } + else if constexpr (join_features.is_any_join && join_features.full) + { + /// TODO + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(find_result); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + + if (join_features.is_any_or_semi_join) + { + break; + } + } + } + } + + if (!right_row_found) + { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(added_columns.filter, i); + addNotFoundRow(added_columns, current_offset); + } + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[i] = current_offset; + } + } + + added_columns.applyLazyDefaults(); + return i; + } + + template + static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) + { + if constexpr (need_filter) + filter[pos] = 1; + } + + template + static ColumnPtr buildAdditionalFilter( + size_t left_start_row, + const std::vector & selected_rows, + const std::vector & row_replicate_offset, + AddedColumns & added_columns) + { + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *selected_rows.begin()->block; + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row.block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row.row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; + } + + /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. + template + static size_t joinRightColumnsWithAddtitionalFilter( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], + bool need_filter [[maybe_unused]], + bool need_flags [[maybe_unused]], + bool add_missing [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) + { + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// For all right join, flag_per_row is true, we need mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + if (need_flags) + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + } + if (!any_matched) + { + if (add_missing) + addNotFoundRow(added_columns, total_added_rows); + else + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row && need_flags) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if (add_missing) + added_columns.applyLazyDefaults(); + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + { + exceeded_max_block_rows = true; + } + } + } + + if constexpr (need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; + } + + /// Cut first num_rows rows from block in place and returns block with remaining rows + static Block sliceBlock(Block & block, size_t num_rows) + { + size_t total_rows = block.rows(); + if (num_rows >= total_rows) + return {}; + size_t remaining_rows = total_rows - num_rows; + Block remaining_block = block.cloneEmpty(); + for (size_t i = 0; i < block.columns(); ++i) + { + auto & col = block.getByPosition(i); + remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); + col.column = col.column->cut(0, num_rows); + } + return remaining_block; + } + + /** Since we do not store right key columns, + * this function is used to copy left key columns to right key columns. + * If the user requests some right columns, we just copy left key columns to right, since they are equal. + * Example: SELECT t1.key, t2.key FROM t1 FULL JOIN t2 ON t1.key = t2.key; + * In that case for matched rows in t2.key we will use values from t1.key. + * However, in some cases we might need to adjust the type of column, e.g. t1.key :: LowCardinality(String) and t2.key :: String + * Also, the nullability of the column might be different. + * Returns the right column after with necessary adjustments. + */ + static ColumnWithTypeAndName copyLeftKeyColumnToRight( + const DataTypePtr & right_key_type, + const String & renamed_right_column, + const ColumnWithTypeAndName & left_column, + const IColumn::Filter * null_map_filter = nullptr) + { + ColumnWithTypeAndName right_column = left_column; + right_column.name = renamed_right_column; + + if (null_map_filter) + right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + + bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); + if (null_map_filter) + correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); + else + correctNullabilityInplace(right_column, should_be_nullable); + + if (!right_column.type->equals(*right_key_type)) + { + right_column.column = castColumnAccurate(right_column, right_key_type); + right_column.type = right_key_type; + } + + right_column.column = right_column.column->convertToFullColumnIfConst(); + return right_column; + } + + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) + { + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } + } + + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) + { + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + if (column.type->isNullable() && !negative_null_map.empty()) + { + MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); + assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + else + JoinCommon::removeColumnNullability(column); + } +}; + +/// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`. +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +} + diff --git a/src/Interpreters/HashJoin/InnerHashJoin.cpp b/src/Interpreters/HashJoin/InnerHashJoin.cpp new file mode 100644 index 00000000000..85aedf3a8e5 --- /dev/null +++ b/src/Interpreters/HashJoin/InnerHashJoin.cpp @@ -0,0 +1,12 @@ + +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/JoinFeatures.h b/src/Interpreters/HashJoin/JoinFeatures.h new file mode 100644 index 00000000000..2f2bd1e29a2 --- /dev/null +++ b/src/Interpreters/HashJoin/JoinFeatures.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +namespace DB +{ +template +struct JoinFeatures +{ + static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; + static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); + static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; + static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; + static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; + + static constexpr bool left = KIND == JoinKind::Left; + static constexpr bool right = KIND == JoinKind::Right; + static constexpr bool inner = KIND == JoinKind::Inner; + static constexpr bool full = KIND == JoinKind::Full; + + static constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); + static constexpr bool add_missing = (left || full) && !is_semi_join; + + static constexpr bool need_flags = MapGetter::flagged; +}; + +} diff --git a/src/Interpreters/HashJoin/JoinUsedFlags.h b/src/Interpreters/HashJoin/JoinUsedFlags.h new file mode 100644 index 00000000000..bd41ba2073f --- /dev/null +++ b/src/Interpreters/HashJoin/JoinUsedFlags.h @@ -0,0 +1,154 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace JoinStuff +{ +/// Flags needed to implement RIGHT and FULL JOINs. +class JoinUsedFlags +{ + using RawBlockPtr = const Block *; + using UsedFlagsForBlock = std::vector; + + /// For multiple dijuncts each empty in hashmap stores flags for particular block + /// For single dicunct we store all flags in `nullptr` entry, index is the offset in FindResult + std::unordered_map flags; + + bool need_flags; + +public: + /// Update size for vector with flags. + /// Calling this method invalidates existing flags. + /// It can be called several times, but all of them should happen before using this structure. + template + void reinit(size_t size) + { + if constexpr (MapGetter::flagged) + { + assert(flags[nullptr].size() <= size); + need_flags = true; + // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. + // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. + // So we reinit only when the hash table is rehashed to a larger size. + if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] + { + flags[nullptr] = std::vector(size); + } + } + } + + template + void reinit(const Block * block_ptr) + { + if constexpr (MapGetter::flagged) + { + assert(flags[block_ptr].size() <= block_ptr->rows()); + need_flags = true; + flags[block_ptr] = std::vector(block_ptr->rows()); + } + } + + bool getUsedSafe(size_t i) const + { + return getUsedSafe(nullptr, i); + } + bool getUsedSafe(const Block * block_ptr, size_t row_idx) const + { + if (auto it = flags.find(block_ptr); it != flags.end()) + return it->second[row_idx].load(); + return !need_flags; + } + + template + void setUsed(const FindResult & f) + { + if constexpr (!use_flags) + return; + + /// Could be set simultaneously from different threads. + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + flags[mapped.block][mapped.row_num].store(true, std::memory_order_relaxed); + } + else + { + flags[nullptr][f.getOffset()].store(true, std::memory_order_relaxed); + } + } + + template + void setUsed(const Block * block, size_t row_num, size_t offset) + { + if constexpr (!use_flags) + return; + + /// Could be set simultaneously from different threads. + if constexpr (flag_per_row) + { + flags[block][row_num].store(true, std::memory_order_relaxed); + } + else + { + flags[nullptr][offset].store(true, std::memory_order_relaxed); + } + } + + template + bool getUsed(const FindResult & f) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + return flags[mapped.block][mapped.row_num].load(); + } + else + { + return flags[nullptr][f.getOffset()].load(); + } + + } + + template + bool setUsedOnce(const FindResult & f) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + + /// fast check to prevent heavy CAS with seq_cst order + if (flags[mapped.block][mapped.row_num].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[mapped.block][mapped.row_num].compare_exchange_strong(expected, true); + } + else + { + auto off = f.getOffset(); + + /// fast check to prevent heavy CAS with seq_cst order + if (flags[nullptr][off].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[nullptr][off].compare_exchange_strong(expected, true); + } + + } +}; + +} +} diff --git a/src/Interpreters/HashJoin/KeyGetter.h b/src/Interpreters/HashJoin/KeyGetter.h new file mode 100644 index 00000000000..35ff2bb6eb5 --- /dev/null +++ b/src/Interpreters/HashJoin/KeyGetter.h @@ -0,0 +1,73 @@ +#pragma once +#include + + +namespace DB +{ +template +class KeyGetterEmpty +{ +public: + struct MappedType + { + using mapped_type = Mapped; + }; + + using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + KeyGetterEmpty() = default; + + FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); } +}; + +template +struct KeyGetterForTypeImpl; + +constexpr bool use_offset = true; + +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodFixedString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodHashed; +}; + +template +struct KeyGetterForType +{ + using Value = typename Data::value_type; + using Mapped_t = typename Data::mapped_type; + using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; + using Type = typename KeyGetterForTypeImpl::Type; +}; +} diff --git a/src/Interpreters/HashJoin/KnowRowsHolder.h b/src/Interpreters/HashJoin/KnowRowsHolder.h new file mode 100644 index 00000000000..d51c96893c5 --- /dev/null +++ b/src/Interpreters/HashJoin/KnowRowsHolder.h @@ -0,0 +1,148 @@ +#pragma once +#include +#include +#include +#include +namespace DB +{ + +template +class KnownRowsHolder; + +/// Keep already joined rows to prevent duplication if many disjuncts +/// if for a particular pair of rows condition looks like TRUE or TRUE or TRUE +/// we want to have it once in resultset +template<> +class KnownRowsHolder +{ +public: + using Type = std::pair; + +private: + static const size_t MAX_LINEAR = 16; // threshold to switch from Array to Set + using ArrayHolder = std::array; + using SetHolder = std::set; + using SetHolderPtr = std::unique_ptr; + + ArrayHolder array_holder; + SetHolderPtr set_holder_ptr; + + size_t items; + +public: + KnownRowsHolder() + : items(0) + { + } + + + template + void add(InputIt from, InputIt to) + { + const size_t new_items = std::distance(from, to); + if (items + new_items <= MAX_LINEAR) + { + std::copy(from, to, &array_holder[items]); + } + else + { + if (items <= MAX_LINEAR) + { + set_holder_ptr = std::make_unique(); + set_holder_ptr->insert(std::cbegin(array_holder), std::cbegin(array_holder) + items); + } + set_holder_ptr->insert(from, to); + } + items += new_items; + } + + template + bool isKnown(const Needle & needle) + { + return items <= MAX_LINEAR + ? std::find(std::cbegin(array_holder), std::cbegin(array_holder) + items, needle) != std::cbegin(array_holder) + items + : set_holder_ptr->find(needle) != set_holder_ptr->end(); + } +}; + +template<> +class KnownRowsHolder +{ +public: + template + void add(InputIt, InputIt) + { + } + + template + static bool isKnown(const Needle &) + { + return false; + } +}; + +template +using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + +template +void addFoundRowAll( + const typename Map::mapped_type & mapped, + AddedColumns & added, + IColumn::Offset & current_offset, + KnownRowsHolder & known_rows [[maybe_unused]], + JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) +{ + if constexpr (add_missing) + added.applyLazyDefaults(); + + if constexpr (flag_per_row) + { + std::unique_ptr::Type>> new_known_rows_ptr; + + for (auto it = mapped.begin(); it.ok(); ++it) + { + if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) + { + added.appendFromBlock(*it->block, it->row_num, false); + ++current_offset; + if (!new_known_rows_ptr) + { + new_known_rows_ptr = std::make_unique::Type>>(); + } + new_known_rows_ptr->push_back(std::make_pair(it->block, it->row_num)); + if (used_flags) + { + used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( + FindResultImpl(*it, true, 0)); + } + } + } + + if (new_known_rows_ptr) + { + known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr)); + } + } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + added.appendFromBlock(*it->block, it->row_num, false); + ++current_offset; + } + } +} + +template +void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) +{ + if constexpr (add_missing) + { + added.appendDefaultRow(); + if constexpr (need_offset) + ++current_offset; + } +} + +} diff --git a/src/Interpreters/HashJoin/LeftHashJoin.cpp b/src/Interpreters/HashJoin/LeftHashJoin.cpp new file mode 100644 index 00000000000..69e17ff70bd --- /dev/null +++ b/src/Interpreters/HashJoin/LeftHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/RightHashJoin.cpp b/src/Interpreters/HashJoin/RightHashJoin.cpp new file mode 100644 index 00000000000..8e304754f5c --- /dev/null +++ b/src/Interpreters/HashJoin/RightHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 5ea347549c1..7f75b0d74b3 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index dccbe68fdb6..54c5c7dc83a 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -3,7 +3,7 @@ #include #include -#include +#include /** Used in implementation of Join to process different data structures. diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6ec460b0894..f5a7d64b885 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index c410b04f209..62527e4a0a8 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index d12e5b1a20b..f27a76dc0dd 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include From 5aa99f88fe6de86faad863b1f579ae2a47272174 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 6 Jun 2024 10:40:25 +0800 Subject: [PATCH 033/140] fix style --- src/Interpreters/HashJoin.cpp | 2877 ------------------- src/Interpreters/HashJoin/HashJoin.cpp | 11 +- src/Interpreters/HashJoin/HashJoinMethods.h | 76 +- 3 files changed, 45 insertions(+), 2919 deletions(-) delete mode 100644 src/Interpreters/HashJoin.cpp diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp deleted file mode 100644 index 1c2a054b2a5..00000000000 --- a/src/Interpreters/HashJoin.cpp +++ /dev/null @@ -1,2877 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include "Core/Joins.h" -#include "Interpreters/TemporaryDataOnDisk.h" - -#include -#include - -namespace CurrentMetrics -{ - extern const Metric TemporaryFilesForJoin; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int NO_SUCH_COLUMN_IN_TABLE; - extern const int INCOMPATIBLE_TYPE_OF_JOIN; - extern const int UNSUPPORTED_JOIN_KEYS; - extern const int LOGICAL_ERROR; - extern const int SYNTAX_ERROR; - extern const int SET_SIZE_LIMIT_EXCEEDED; - extern const int TYPE_MISMATCH; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int INVALID_JOIN_ON_EXPRESSION; -} - -namespace -{ - -struct NotProcessedCrossJoin : public ExtraBlock -{ - size_t left_position; - size_t right_block; - std::unique_ptr reader; -}; - - -Int64 getCurrentQueryMemoryUsage() -{ - /// Use query-level memory tracker - if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) - if (auto * memory_tracker = memory_tracker_child->getParent()) - return memory_tracker->get(); - return 0; -} - -} - -namespace JoinStuff -{ - /// for single disjunct - bool JoinUsedFlags::getUsedSafe(size_t i) const - { - return getUsedSafe(nullptr, i); - } - - /// for multiple disjuncts - bool JoinUsedFlags::getUsedSafe(const Block * block_ptr, size_t row_idx) const - { - if (auto it = flags.find(block_ptr); it != flags.end()) - return it->second[row_idx].load(); - return !need_flags; - } - - /// for single disjunct - template - void JoinUsedFlags::reinit(size_t size) - { - if constexpr (MapGetter::flagged) - { - assert(flags[nullptr].size() <= size); - need_flags = true; - // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. - // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. - // So we reinit only when the hash table is rehashed to a larger size. - if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] - { - flags[nullptr] = std::vector(size); - } - } - } - - /// for multiple disjuncts - template - void JoinUsedFlags::reinit(const Block * block_ptr) - { - if constexpr (MapGetter::flagged) - { - assert(flags[block_ptr].size() <= block_ptr->rows()); - need_flags = true; - flags[block_ptr] = std::vector(block_ptr->rows()); - } - } - - template - void JoinUsedFlags::setUsed(const FindResult & f) - { - if constexpr (!use_flags) - return; - - /// Could be set simultaneously from different threads. - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - flags[mapped.block][mapped.row_num].store(true, std::memory_order_relaxed); - } - else - { - flags[nullptr][f.getOffset()].store(true, std::memory_order_relaxed); - } - } - - template - void JoinUsedFlags::setUsed(const Block * block, size_t row_num, size_t offset) - { - if constexpr (!use_flags) - return; - - /// Could be set simultaneously from different threads. - if constexpr (flag_per_row) - { - flags[block][row_num].store(true, std::memory_order_relaxed); - } - else - { - flags[nullptr][offset].store(true, std::memory_order_relaxed); - } - } - - template - bool JoinUsedFlags::getUsed(const FindResult & f) - { - if constexpr (!use_flags) - return true; - - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - return flags[mapped.block][mapped.row_num].load(); - } - else - { - return flags[nullptr][f.getOffset()].load(); - } - } - - template - bool JoinUsedFlags::setUsedOnce(const FindResult & f) - { - if constexpr (!use_flags) - return true; - - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - - /// fast check to prevent heavy CAS with seq_cst order - if (flags[mapped.block][mapped.row_num].load(std::memory_order_relaxed)) - return false; - - bool expected = false; - return flags[mapped.block][mapped.row_num].compare_exchange_strong(expected, true); - } - else - { - auto off = f.getOffset(); - - /// fast check to prevent heavy CAS with seq_cst order - if (flags[nullptr][off].load(std::memory_order_relaxed)) - return false; - - bool expected = false; - return flags[nullptr][off].compare_exchange_strong(expected, true); - } - } -} - -static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) -{ - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - } - else - { - /// We have to replace values masked by NULLs with defaults. - if (column.column) - if (const auto * nullable_column = checkAndGetColumn(&*column.column)) - column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); - - JoinCommon::removeColumnNullability(column); - } -} - -static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) -{ - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - if (column.type->isNullable() && !negative_null_map.empty()) - { - MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); - assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); - column.column = std::move(mutable_column); - } - } - else - JoinCommon::removeColumnNullability(column); -} - -HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, - bool any_take_last_row_, size_t reserve_num_, const String & instance_id_) - : table_join(table_join_) - , kind(table_join->kind()) - , strictness(table_join->strictness()) - , any_take_last_row(any_take_last_row_) - , reserve_num(reserve_num_) - , instance_id(instance_id_) - , asof_inequality(table_join->getAsofInequality()) - , data(std::make_shared()) - , tmp_data( - table_join_->getTempDataOnDisk() - ? std::make_unique(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin) - : nullptr) - , right_sample_block(right_sample_block_) - , max_joined_block_rows(table_join->maxJoinedBlockRows()) - , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") - , log(getLogger("HashJoin")) -{ - LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", - instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); - - validateAdditionalFilterExpression(table_join->getMixedJoinExpression()); - - if (isCrossOrComma(kind)) - { - data->type = Type::CROSS; - sample_block_with_columns_to_add = right_sample_block; - } - else if (table_join->getClauses().empty()) - { - data->type = Type::EMPTY; - /// We might need to insert default values into the right columns, materialize them - sample_block_with_columns_to_add = materializeBlock(right_sample_block); - } - else if (table_join->oneDisjunct()) - { - const auto & key_names_right = table_join->getOnlyClause().key_names_right; - JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, sample_block_with_columns_to_add); - required_right_keys = table_join->getRequiredRightKeys(right_table_keys, required_right_keys_sources); - } - else - { - /// required right keys concept does not work well if multiple disjuncts, we need all keys - sample_block_with_columns_to_add = right_table_keys = materializeBlock(right_sample_block); - } - - materializeBlockInplace(right_table_keys); - initRightBlockStructure(data->sample_block); - data->sample_block = prepareRightBlock(data->sample_block); - - JoinCommon::createMissedColumns(sample_block_with_columns_to_add); - - size_t disjuncts_num = table_join->getClauses().size(); - data->maps.resize(disjuncts_num); - key_sizes.reserve(disjuncts_num); - - for (const auto & clause : table_join->getClauses()) - { - const auto & key_names_right = clause.key_names_right; - ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(right_table_keys, key_names_right); - - if (strictness == JoinStrictness::Asof) - { - assert(disjuncts_num == 1); - - /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. - /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. - if (!isLeft(kind) && !isInner(kind)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); - - if (key_columns.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); - - size_t asof_size; - asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); - key_columns.pop_back(); - - /// this is going to set up the appropriate hash table for the direct lookup part of the join - /// However, this does not depend on the size of the asof join key (as that goes into the BST) - /// Therefore, add it back in such that it can be extracted appropriately from the full stored - /// key_columns and key_sizes - auto & asof_key_sizes = key_sizes.emplace_back(); - data->type = chooseMethod(kind, key_columns, asof_key_sizes); - asof_key_sizes.push_back(asof_size); - } - else - { - /// Choose data structure to use for JOIN. - auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back()); - if (data->type == Type::EMPTY) - data->type = current_join_method; - else if (data->type != current_join_method) - data->type = Type::hashed; - } - } - - for (auto & maps : data->maps) - dataMapInit(maps); -} - -HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) -{ - size_t keys_size = key_columns.size(); - - if (keys_size == 0) - { - if (isCrossOrComma(kind)) - return Type::CROSS; - return Type::EMPTY; - } - - bool all_fixed = true; - size_t keys_bytes = 0; - key_sizes.resize(keys_size); - for (size_t j = 0; j < keys_size; ++j) - { - if (!key_columns[j]->isFixedAndContiguous()) - { - all_fixed = false; - break; - } - key_sizes[j] = key_columns[j]->sizeOfValueIfFixed(); - keys_bytes += key_sizes[j]; - } - - /// If there is one numeric key that fits in 64 bits - if (keys_size == 1 && key_columns[0]->isNumeric()) - { - size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); - if (size_of_field == 1) - return Type::key8; - if (size_of_field == 2) - return Type::key16; - if (size_of_field == 4) - return Type::key32; - if (size_of_field == 8) - return Type::key64; - if (size_of_field == 16) - return Type::keys128; - if (size_of_field == 32) - return Type::keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); - } - - /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys - if (all_fixed && keys_bytes <= 16) - return Type::keys128; - if (all_fixed && keys_bytes <= 32) - return Type::keys256; - - /// If there is single string key, use hash table of it's values. - if (keys_size == 1) - { - auto is_string_column = [](const IColumn * column_ptr) -> bool - { - if (const auto * lc_column_ptr = typeid_cast(column_ptr)) - return typeid_cast(lc_column_ptr->getDictionary().getNestedColumn().get()); - return typeid_cast(column_ptr); - }; - - const auto * key_column = key_columns[0]; - if (is_string_column(key_column) || - (isColumnConst(*key_column) && is_string_column(assert_cast(key_column)->getDataColumnPtr().get()))) - return Type::key_string; - } - - if (keys_size == 1 && typeid_cast(key_columns[0])) - return Type::key_fixed_string; - - /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. - return Type::hashed; -} - -template -static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) -{ - if constexpr (is_asof_join) - { - auto key_column_copy = key_columns; - auto key_size_copy = key_sizes; - key_column_copy.pop_back(); - key_size_copy.pop_back(); - return KeyGetter(key_column_copy, key_size_copy, nullptr); - } - else - return KeyGetter(key_columns, key_sizes, nullptr); -} - -template -using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl; - -/// Dummy key getter, always find nothing, used for JOIN ON NULL -template -class KeyGetterEmpty -{ -public: - struct MappedType - { - using mapped_type = Mapped; - }; - - using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; - - KeyGetterEmpty() = default; - - FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); } -}; - -template -struct KeyGetterForTypeImpl; - -constexpr bool use_offset = true; - -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodString; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodFixedString; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodKeysFixed; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodKeysFixed; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodHashed; -}; - -template -struct KeyGetterForType -{ - using Value = typename Data::value_type; - using Mapped_t = typename Data::mapped_type; - using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; - using Type = typename KeyGetterForTypeImpl::Type; -}; - -void HashJoin::dataMapInit(MapsVariant & map) -{ - if (kind == JoinKind::Cross) - return; - joinDispatchInit(kind, strictness, map); - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); - - if (reserve_num) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); - } - - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HashJoin::dataMapInit called with empty data"); -} - -bool HashJoin::empty() const -{ - return data->type == Type::EMPTY; -} - -bool HashJoin::alwaysReturnsEmptySet() const -{ - return isInnerOrRight(getKind()) && data->empty; -} - -size_t HashJoin::getTotalRowCount() const -{ - if (!data) - return 0; - - size_t res = 0; - - if (data->type == Type::CROSS) - { - for (const auto & block : data->blocks) - res += block.rows(); - } - else - { - for (const auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); - } - } - - return res; -} - -size_t HashJoin::getTotalByteCount() const -{ - if (!data) - return 0; - -#ifndef NDEBUG - size_t debug_blocks_allocated_size = 0; - for (const auto & block : data->blocks) - debug_blocks_allocated_size += block.allocatedBytes(); - - if (data->blocks_allocated_size != debug_blocks_allocated_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", - data->blocks_allocated_size, debug_blocks_allocated_size); - - size_t debug_blocks_nullmaps_allocated_size = 0; - for (const auto & nullmap : data->blocks_nullmaps) - debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); - - if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", - data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); -#endif - - size_t res = 0; - - res += data->blocks_allocated_size; - res += data->blocks_nullmaps_allocated_size; - res += data->pool.allocatedBytes(); - - if (data->type != Type::CROSS) - { - for (const auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); - } - } - return res; -} - -namespace -{ - /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. - template - struct Inserter - { - static ALWAYS_INLINE bool insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, - Arena & pool) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - - if (emplace_result.isInserted() || join.anyTakeLastRow()) - { - new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); - return true; - } - return false; - } - - static ALWAYS_INLINE void insertAll(const HashJoin &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - - if (emplace_result.isInserted()) - new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); - else - { - /// The first element of the list is stored in the value of the hash table, the rest in the pool. - emplace_result.getMapped().insert({stored_block, i}, pool); - } - } - - static ALWAYS_INLINE void insertAsof(HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, - const IColumn & asof_column) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); - - TypeIndex asof_type = *join.getAsofType(); - if (emplace_result.isInserted()) - time_series_map = new (time_series_map) typename Map::mapped_type(createAsofRowRef(asof_type, join.getAsofInequality())); - (*time_series_map)->insert(asof_column, stored_block, i); - } - }; - - - template - size_t NO_INLINE insertFromBlockImplTypeCase( - HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - - const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (is_asof_join) - asof_column = key_columns.back(); - - auto key_getter = createKeyGetter(key_columns, key_sizes); - - /// For ALL and ASOF join always insert values - is_inserted = !mapped_one || is_asof_join; - - for (size_t i = 0; i < rows; ++i) - { - if (null_map && (*null_map)[i]) - { - /// nulls are not inserted into hash table, - /// keep them for RIGHT and FULL joins - is_inserted = true; - continue; - } - - /// Check condition for right table from ON section - if (join_mask && !(*join_mask)[i]) - continue; - - if constexpr (is_asof_join) - Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); - else if constexpr (mapped_one) - is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); - else - Inserter::insertAll(join, map, key_getter, stored_block, i, pool); - } - return map.getBufferSizeInCells(); - } - - template - size_t insertFromBlockImpl( - HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - switch (type) - { - case HashJoin::Type::EMPTY: - [[fallthrough]]; - case HashJoin::Type::CROSS: - /// Do nothing. We will only save block, and it is enough - is_inserted = true; - return 0; - - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return insertFromBlockImplTypeCase>::Type>(\ - join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ - break; - - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - } -} - -void HashJoin::initRightBlockStructure(Block & saved_block_sample) -{ - if (isCrossOrComma(kind)) - { - /// cross join doesn't have keys, just add all columns - saved_block_sample = sample_block_with_columns_to_add.cloneEmpty(); - return; - } - - bool multiple_disjuncts = !table_join->oneDisjunct(); - /// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any). - bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || - table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) || - isRightOrFull(kind) || - multiple_disjuncts || - table_join->getMixedJoinExpression(); - if (save_key_columns) - { - saved_block_sample = right_table_keys.cloneEmpty(); - } - else if (strictness == JoinStrictness::Asof) - { - /// Save ASOF key - saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1)); - } - - /// Save non key columns - for (auto & column : sample_block_with_columns_to_add) - { - if (auto * col = saved_block_sample.findByName(column.name)) - *col = column; - else - saved_block_sample.insert(column); - } -} - -Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) -{ - Block structured_block; - for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) - { - ColumnWithTypeAndName column = block.getByName(sample_column.name); - - /// There's no optimization for right side const columns. Remove constness if any. - column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); - - if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) - { - column.column = column.column->convertToFullColumnIfLowCardinality(); - column.type = removeLowCardinality(column.type); - } - - if (sample_column.column->isNullable()) - JoinCommon::convertColumnToNullable(column); - - structured_block.insert(std::move(column)); - } - - return structured_block; -} - -Block HashJoin::prepareRightBlock(const Block & block) const -{ - return prepareRightBlock(block, savedBlockSample()); -} - -bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) -{ - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); - - /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. - /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. - if (unlikely(source_block_.rows() > std::numeric_limits::max())) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows()); - - /** We do not allocate memory for stored blocks inside HashJoin, only for hash table. - * In case when we have all the blocks allocated before the first `addBlockToJoin` call, will already be quite high. - * In that case memory consumed by stored blocks will be underestimated. - */ - if (!memory_usage_before_adding_blocks) - memory_usage_before_adding_blocks = getCurrentQueryMemoryUsage(); - - Block source_block = source_block_; - if (strictness == JoinStrictness::Asof) - { - chassert(kind == JoinKind::Left || kind == JoinKind::Inner); - - /// Filter out rows with NULLs in ASOF key, nulls are not joined with anything since they are not comparable - /// We support only INNER/LEFT ASOF join, so rows with NULLs never return from the right joined table. - /// So filter them out here not to handle in implementation. - const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back(); - auto & asof_column = source_block.getByName(asof_key_name); - - if (asof_column.type->isNullable()) - { - /// filter rows with nulls in asof key - if (const auto * asof_const_column = typeid_cast(asof_column.column.get())) - { - if (asof_const_column->isNullAt(0)) - return false; - } - else - { - const auto & asof_column_nullable = assert_cast(*asof_column.column).getNullMapData(); - - NullMap negative_null_map(asof_column_nullable.size()); - for (size_t i = 0; i < asof_column_nullable.size(); ++i) - negative_null_map[i] = !asof_column_nullable[i]; - - for (auto & column : source_block) - column.column = column.column->filter(negative_null_map, -1); - } - } - } - - size_t rows = source_block.rows(); - - const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right); - ColumnPtrMap all_key_columns(right_key_names.size()); - for (const auto & column_name : right_key_names) - { - const auto & column = source_block.getByName(column_name).column; - all_key_columns[column_name] = recursiveRemoveSparse(column->convertToFullColumnIfConst())->convertToFullColumnIfLowCardinality(); - } - - Block block_to_save = prepareRightBlock(source_block); - if (shrink_blocks) - block_to_save = block_to_save.shrinkToFit(); - - size_t max_bytes_in_join = table_join->sizeLimits().max_bytes; - size_t max_rows_in_join = table_join->sizeLimits().max_rows; - - if (kind == JoinKind::Cross && tmp_data - && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join) - || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join))) - { - if (tmp_stream == nullptr) - { - tmp_stream = &tmp_data->createStream(right_sample_block); - } - tmp_stream->write(block_to_save); - return true; - } - - size_t total_rows = 0; - size_t total_bytes = 0; - { - if (storage_join_lock) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); - - assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block"); - - size_t min_bytes_to_compress = table_join->crossJoinMinBytesToCompress(); - size_t min_rows_to_compress = table_join->crossJoinMinRowsToCompress(); - - if (kind == JoinKind::Cross - && ((min_bytes_to_compress && getTotalByteCount() >= min_bytes_to_compress) - || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) - { - block_to_save = block_to_save.compress(); - have_compressed = true; - } - - data->blocks_allocated_size += block_to_save.allocatedBytes(); - data->blocks.emplace_back(std::move(block_to_save)); - Block * stored_block = &data->blocks.back(); - - if (rows) - data->empty = false; - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - const auto & onexprs = table_join->getClauses(); - for (size_t onexpr_idx = 0; onexpr_idx < onexprs.size(); ++onexpr_idx) - { - ColumnRawPtrs key_columns; - for (const auto & name : onexprs[onexpr_idx].key_names_right) - key_columns.push_back(all_key_columns[name].get()); - - /// We will insert to the map only keys, where all components are not NULL. - ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); - - /// If RIGHT or FULL save blocks with nulls for NotJoinedBlocks - UInt8 save_nullmap = 0; - if (isRightOrFull(kind) && null_map) - { - /// Save rows with NULL keys - for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i) - save_nullmap |= (*null_map)[i]; - } - - auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); - /// Save blocks that do not hold conditions in ON section - ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!flag_per_row && isRightOrFull(kind) && join_mask_col.hasData()) - { - const auto & join_mask = join_mask_col.getData(); - /// Save rows that do not hold conditions - not_joined_map = ColumnUInt8::create(rows, 0); - for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) - { - /// Condition hold, do not save row - if ((*join_mask)[i]) - continue; - - /// NULL key will be saved anyway because, do not save twice - if (save_nullmap && (*null_map)[i]) - continue; - - not_joined_map->getData()[i] = 1; - } - } - - bool is_inserted = false; - if (kind != JoinKind::Cross) - { - joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) - { - size_t size = insertFromBlockImpl( - *this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map, - /// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized - join_mask_col.getData(), - data->pool, is_inserted); - - if (flag_per_row) - used_flags.reinit(stored_block); - else if (is_inserted) - /// Number of buckets + 1 value from zero storage - used_flags.reinit(size + 1); - }); - } - - if (!flag_per_row && save_nullmap && is_inserted) - { - data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); - data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); - } - - if (!flag_per_row && not_joined_map && is_inserted) - { - data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); - data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); - } - - if (!flag_per_row && !is_inserted) - { - LOG_TRACE(log, "Skipping inserting block with {} rows", rows); - data->blocks_allocated_size -= stored_block->allocatedBytes(); - data->blocks.pop_back(); - } - - if (!check_limits) - return true; - - /// TODO: Do not calculate them every time - total_rows = getTotalRowCount(); - total_bytes = getTotalByteCount(); - } - } - - shrinkStoredBlocksToFit(total_bytes); - - return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); -} - -void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join) -{ - if (shrink_blocks) - return; /// Already shrunk - - Int64 current_memory_usage = getCurrentQueryMemoryUsage(); - Int64 query_memory_usage_delta = current_memory_usage - memory_usage_before_adding_blocks; - Int64 max_total_bytes_for_query = memory_usage_before_adding_blocks ? table_join->getMaxMemoryUsage() : 0; - - auto max_total_bytes_in_join = table_join->sizeLimits().max_bytes; - - /** If accounted data size is more than half of `max_bytes_in_join` - * or query memory consumption growth from the beginning of adding blocks (estimation of memory consumed by join using memory tracker) - * is bigger than half of all memory available for query, - * then shrink stored blocks to fit. - */ - shrink_blocks = (max_total_bytes_in_join && total_bytes_in_join > max_total_bytes_in_join / 2) || - (max_total_bytes_for_query && query_memory_usage_delta > max_total_bytes_for_query / 2); - if (!shrink_blocks) - return; - - LOG_DEBUG(log, "Shrinking stored blocks, memory consumption is {} {} calculated by join, {} {} by memory tracker", - ReadableSize(total_bytes_in_join), max_total_bytes_in_join ? fmt::format("/ {}", ReadableSize(max_total_bytes_in_join)) : "", - ReadableSize(query_memory_usage_delta), max_total_bytes_for_query ? fmt::format("/ {}", ReadableSize(max_total_bytes_for_query)) : ""); - - for (auto & stored_block : data->blocks) - { - size_t old_size = stored_block.allocatedBytes(); - stored_block = stored_block.shrinkToFit(); - size_t new_size = stored_block.allocatedBytes(); - - if (old_size >= new_size) - { - if (data->blocks_allocated_size < old_size - new_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Blocks allocated size value is broken: " - "blocks_allocated_size = {}, old_size = {}, new_size = {}", - data->blocks_allocated_size, old_size, new_size); - - data->blocks_allocated_size -= old_size - new_size; - } - else - /// Sometimes after clone resized block can be bigger than original - data->blocks_allocated_size += new_size - old_size; - } - - auto new_total_bytes_in_join = getTotalByteCount(); - - Int64 new_current_memory_usage = getCurrentQueryMemoryUsage(); - - LOG_DEBUG(log, "Shrunk stored blocks {} freed ({} by memory tracker), new memory consumption is {} ({} by memory tracker)", - ReadableSize(total_bytes_in_join - new_total_bytes_in_join), ReadableSize(current_memory_usage - new_current_memory_usage), - ReadableSize(new_total_bytes_in_join), ReadableSize(new_current_memory_usage)); - - total_bytes_in_join = new_total_bytes_in_join; -} - - -namespace -{ - -struct JoinOnKeyColumns -{ - Names key_names; - - Columns materialized_keys_holder; - ColumnRawPtrs key_columns; - - ConstNullMapPtr null_map; - ColumnPtr null_map_holder; - - /// Only rows where mask == true can be joined - JoinCommon::JoinMask join_mask_column; - - Sizes key_sizes; - - explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) - : key_names(key_names_) - , materialized_keys_holder(JoinCommon::materializeColumns(block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. - , key_columns(JoinCommon::getRawPointers(materialized_keys_holder)) - , null_map(nullptr) - , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) - , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) - , key_sizes(key_sizes_) - { - } - - bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } -}; - -template -class AddedColumns -{ -public: - struct TypeAndName - { - DataTypePtr type; - String name; - String qualified_name; - - TypeAndName(DataTypePtr type_, const String & name_, const String & qualified_name_) - : type(type_), name(name_), qualified_name(qualified_name_) - { - } - }; - - struct LazyOutput - { - PaddedPODArray blocks; - PaddedPODArray row_nums; - }; - - AddedColumns( - const Block & left_block_, - const Block & block_with_columns_to_add, - const Block & saved_block_sample, - const HashJoin & join, - std::vector && join_on_keys_, - ExpressionActionsPtr additional_filter_expression_, - bool is_asof_join, - bool is_join_get_) - : left_block(left_block_) - , join_on_keys(join_on_keys_) - , additional_filter_expression(additional_filter_expression_) - , rows_to_add(left_block.rows()) - , is_join_get(is_join_get_) - { - size_t num_columns_to_add = block_with_columns_to_add.columns(); - if (is_asof_join) - ++num_columns_to_add; - - if constexpr (lazy) - { - has_columns_to_add = num_columns_to_add > 0; - lazy_output.blocks.reserve(rows_to_add); - lazy_output.row_nums.reserve(rows_to_add); - } - - columns.reserve(num_columns_to_add); - type_name.reserve(num_columns_to_add); - right_indexes.reserve(num_columns_to_add); - - for (const auto & src_column : block_with_columns_to_add) - { - /// Column names `src_column.name` and `qualified_name` can differ for StorageJoin, - /// because it uses not qualified right block column names - auto qualified_name = join.getTableJoin().renamedRightColumnName(src_column.name); - /// Don't insert column if it's in left block - if (!left_block.has(qualified_name)) - addColumn(src_column, qualified_name); - } - - if (is_asof_join) - { - assert(join_on_keys.size() == 1); - const ColumnWithTypeAndName & right_asof_column = join.rightAsofKeyColumn(); - addColumn(right_asof_column, right_asof_column.name); - left_asof_key = join_on_keys[0].key_columns.back(); - } - - for (auto & tn : type_name) - right_indexes.push_back(saved_block_sample.getPositionByName(tn.name)); - - nullable_column_ptrs.resize(right_indexes.size(), nullptr); - for (size_t j = 0; j < right_indexes.size(); ++j) - { - /** If it's joinGetOrNull, we will have nullable columns in result block - * even if right column is not nullable in storage (saved_block_sample). - */ - const auto & saved_column = saved_block_sample.getByPosition(right_indexes[j]).column; - if (columns[j]->isNullable() && !saved_column->isNullable()) - nullable_column_ptrs[j] = typeid_cast(columns[j].get()); - } - } - - size_t size() const { return columns.size(); } - - void buildOutput(); - - ColumnWithTypeAndName moveColumn(size_t i) - { - return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); - } - - void appendFromBlock(const Block & block, size_t row_num, bool has_default); - - void appendDefaultRow(); - - void applyLazyDefaults(); - - const IColumn & leftAsofKey() const { return *left_asof_key; } - - Block left_block; - std::vector join_on_keys; - ExpressionActionsPtr additional_filter_expression; - - size_t max_joined_block_rows = 0; - size_t rows_to_add; - std::unique_ptr offsets_to_replicate; - bool need_filter = false; - IColumn::Filter filter; - - void reserve(bool need_replicate) - { - if (!max_joined_block_rows) - return; - - /// Do not allow big allocations when user set max_joined_block_rows to huge value - size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); - - if (need_replicate) - /// Reserve 10% more space for columns, because some rows can be repeated - reserve_size = static_cast(1.1 * reserve_size); - - for (auto & column : columns) - column->reserve(reserve_size); - } - -private: - - void checkBlock(const Block & block) - { - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } - } - - MutableColumns columns; - bool is_join_get; - std::vector right_indexes; - std::vector type_name; - std::vector nullable_column_ptrs; - size_t lazy_defaults_count = 0; - - /// for lazy - // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, - // default_count cannot represent the position of the row - LazyOutput lazy_output; - bool has_columns_to_add; - - /// for ASOF - const IColumn * left_asof_key = nullptr; - - - void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name, qualified_name); - } -}; -template<> void AddedColumns::buildOutput() -{ -} - -template<> -void AddedColumns::buildOutput() -{ - for (size_t i = 0; i < this->size(); ++i) - { - auto& col = columns[i]; - size_t default_count = 0; - auto apply_default = [&]() - { - if (default_count > 0) - { - JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); - default_count = 0; - } - }; - - for (size_t j = 0; j < lazy_output.blocks.size(); ++j) - { - if (!lazy_output.blocks[j]) - { - default_count++; - continue; - } - apply_default(); - const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); - /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. - if (is_join_get) - { - if (auto * nullable_col = typeid_cast(col.get()); - nullable_col && !column_from_block.column->isNullable()) - { - nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); - continue; - } - } - col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); - } - apply_default(); - } -} - -template<> -void AddedColumns::applyLazyDefaults() -{ - if (lazy_defaults_count) - { - for (size_t j = 0, size = right_indexes.size(); j < size; ++j) - JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); - lazy_defaults_count = 0; - } -} - -template<> -void AddedColumns::applyLazyDefaults() -{ -} - -template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) -{ - if (has_defaults) - applyLazyDefaults(); - -#ifndef NDEBUG - checkBlock(block); -#endif - if (is_join_get) - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); - else - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - else - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } -} - -template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) -{ -#ifndef NDEBUG - checkBlock(block); -#endif - if (has_columns_to_add) - { - lazy_output.blocks.emplace_back(reinterpret_cast(&block)); - lazy_output.row_nums.emplace_back(static_cast(row_num)); - } -} -template<> -void AddedColumns::appendDefaultRow() -{ - ++lazy_defaults_count; -} - -template<> -void AddedColumns::appendDefaultRow() -{ - if (has_columns_to_add) - { - lazy_output.blocks.emplace_back(0); - lazy_output.row_nums.emplace_back(0); - } -} - -template -struct JoinFeatures -{ - static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; - static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); - static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; - static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; - static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; - - static constexpr bool left = KIND == JoinKind::Left; - static constexpr bool right = KIND == JoinKind::Right; - static constexpr bool inner = KIND == JoinKind::Inner; - static constexpr bool full = KIND == JoinKind::Full; - - static constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); - static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); - static constexpr bool add_missing = (left || full) && !is_semi_join; - - static constexpr bool need_flags = MapGetter::flagged; -}; - -template -class KnownRowsHolder; - -/// Keep already joined rows to prevent duplication if many disjuncts -/// if for a particular pair of rows condition looks like TRUE or TRUE or TRUE -/// we want to have it once in resultset -template<> -class KnownRowsHolder -{ -public: - using Type = std::pair; - -private: - static const size_t MAX_LINEAR = 16; // threshold to switch from Array to Set - using ArrayHolder = std::array; - using SetHolder = std::set; - using SetHolderPtr = std::unique_ptr; - - ArrayHolder array_holder; - SetHolderPtr set_holder_ptr; - - size_t items; - -public: - KnownRowsHolder() - : items(0) - { - } - - - template - void add(InputIt from, InputIt to) - { - const size_t new_items = std::distance(from, to); - if (items + new_items <= MAX_LINEAR) - { - std::copy(from, to, &array_holder[items]); - } - else - { - if (items <= MAX_LINEAR) - { - set_holder_ptr = std::make_unique(); - set_holder_ptr->insert(std::cbegin(array_holder), std::cbegin(array_holder) + items); - } - set_holder_ptr->insert(from, to); - } - items += new_items; - } - - template - bool isKnown(const Needle & needle) - { - return items <= MAX_LINEAR - ? std::find(std::cbegin(array_holder), std::cbegin(array_holder) + items, needle) != std::cbegin(array_holder) + items - : set_holder_ptr->find(needle) != set_holder_ptr->end(); - } -}; - -template<> -class KnownRowsHolder -{ -public: - template - void add(InputIt, InputIt) - { - } - - template - static bool isKnown(const Needle &) - { - return false; - } -}; - -template -void addFoundRowAll( - const typename Map::mapped_type & mapped, - AddedColumns & added, - IColumn::Offset & current_offset, - KnownRowsHolder & known_rows [[maybe_unused]], - JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) -{ - if constexpr (add_missing) - added.applyLazyDefaults(); - - if constexpr (flag_per_row) - { - std::unique_ptr::Type>> new_known_rows_ptr; - - for (auto it = mapped.begin(); it.ok(); ++it) - { - if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) - { - added.appendFromBlock(*it->block, it->row_num, false); - ++current_offset; - if (!new_known_rows_ptr) - { - new_known_rows_ptr = std::make_unique::Type>>(); - } - new_known_rows_ptr->push_back(std::make_pair(it->block, it->row_num)); - if (used_flags) - { - used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( - FindResultImpl(*it, true, 0)); - } - } - } - - if (new_known_rows_ptr) - { - known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr)); - } - } - else - { - for (auto it = mapped.begin(); it.ok(); ++it) - { - added.appendFromBlock(*it->block, it->row_num, false); - ++current_offset; - } - } -} - -template -void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) -{ - if constexpr (add_missing) - { - added.appendDefaultRow(); - if constexpr (need_offset) - ++current_offset; - } -} - -template -void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) -{ - if constexpr (need_filter) - filter[pos] = 1; -} - -template -ColumnPtr buildAdditionalFilter( - size_t left_start_row, - const std::vector & selected_rows, - const std::vector & row_replicate_offset, - AddedColumns & added_columns) -{ - ColumnPtr result_column; - do - { - if (selected_rows.empty()) - { - result_column = ColumnUInt8::create(); - break; - } - const Block & sample_right_block = *selected_rows.begin()->block; - if (!sample_right_block || !added_columns.additional_filter_expression) - { - auto filter = ColumnUInt8::create(); - filter->insertMany(1, selected_rows.size()); - result_column = std::move(filter); - break; - } - - auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); - if (required_cols.empty()) - { - Block block; - added_columns.additional_filter_expression->execute(block); - result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); - break; - } - NameSet required_column_names; - for (auto & col : required_cols) - required_column_names.insert(col.name); - - Block executed_block; - size_t right_col_pos = 0; - for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) - { - if (required_column_names.contains(col.name)) - { - auto new_col = col.column->cloneEmpty(); - for (const auto & selected_row : selected_rows) - { - const auto & src_col = selected_row.block->getByPosition(right_col_pos); - new_col->insertFrom(*src_col.column, selected_row.row_num); - } - executed_block.insert({std::move(new_col), col.type, col.name}); - } - right_col_pos += 1; - } - if (!executed_block) - { - result_column = ColumnUInt8::create(); - break; - } - - for (const auto & col_name : required_column_names) - { - const auto * src_col = added_columns.left_block.findByName(col_name); - if (!src_col) - continue; - auto new_col = src_col->column->cloneEmpty(); - size_t prev_left_offset = 0; - for (size_t i = 1; i < row_replicate_offset.size(); ++i) - { - const size_t & left_offset = row_replicate_offset[i]; - size_t rows = left_offset - prev_left_offset; - if (rows) - new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); - prev_left_offset = left_offset; - } - executed_block.insert({std::move(new_col), src_col->type, col_name}); - } - if (!executed_block) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", - required_cols.toString(), - sample_right_block.dumpNames(), - added_columns.left_block.dumpNames()); - } - - for (const auto & col : executed_block.getColumnsWithTypeAndName()) - if (!col.column || !col.type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); - - added_columns.additional_filter_expression->execute(executed_block); - result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); - executed_block.clear(); - } while (false); - - result_column = result_column->convertToFullIfNeeded(); - if (result_column->isNullable()) - { - /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros - /// Trying to avoid copying data, since we are the only owner of the column. - ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); - - MutableColumnPtr mutable_column; - { - ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); - result_column.reset(); - mutable_column = IColumn::mutate(std::move(nested_column)); - } - - auto & column_data = assert_cast(*mutable_column).getData(); - const auto & mask_column_data = assert_cast(*mask_column).getData(); - for (size_t i = 0; i < column_data.size(); ++i) - { - if (mask_column_data[i]) - column_data[i] = 0; - } - return mutable_column; - } - return result_column; -} - -/// Adapter class to pass into addFoundRowAll -/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, -/// because they need to be filtered by additional_filter_expression. -class PreSelectedRows : public std::vector -{ -public: - void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } -}; - -/// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. -template < - typename KeyGetter, - typename Map, - bool need_replication, - typename AddedColumns> -NO_INLINE size_t joinRightColumnsWithAddtitionalFilter( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], - bool need_filter [[maybe_unused]], - bool need_flags [[maybe_unused]], - bool add_missing [[maybe_unused]], - bool flag_per_row [[maybe_unused]]) -{ - size_t left_block_rows = added_columns.rows_to_add; - if (need_filter) - added_columns.filter = IColumn::Filter(left_block_rows, 0); - - std::unique_ptr pool; - - if constexpr (need_replication) - added_columns.offsets_to_replicate = std::make_unique(left_block_rows); - - std::vector row_replicate_offset; - row_replicate_offset.reserve(left_block_rows); - - using FindResult = typename KeyGetter::FindResult; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t left_row_iter = 0; - PreSelectedRows selected_rows; - selected_rows.reserve(left_block_rows); - std::vector find_results; - find_results.reserve(left_block_rows); - bool exceeded_max_block_rows = false; - IColumn::Offset total_added_rows = 0; - IColumn::Offset current_added_rows = 0; - - auto collect_keys_matched_rows_refs = [&]() - { - pool = std::make_unique(); - find_results.clear(); - row_replicate_offset.clear(); - row_replicate_offset.push_back(0); - current_added_rows = 0; - selected_rows.clear(); - for (; left_row_iter < left_block_rows; ++left_row_iter) - { - if constexpr (need_replication) - { - if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) - { - break; - } - } - KnownRowsHolder all_flag_known_rows; - KnownRowsHolder single_flag_know_rows; - for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) - { - const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; - if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); - auto find_result = row_acceptable - ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) - : FindResult(); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - find_results.push_back(find_result); - if (flag_per_row) - addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); - else - addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); - } - } - row_replicate_offset.push_back(current_added_rows); - } - }; - - auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) - { - const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); - - size_t prev_replicated_row = 0; - auto selected_right_row_it = selected_rows.begin(); - size_t find_result_index = 0; - for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) - { - bool any_matched = false; - /// For all right join, flag_per_row is true, we need mark used flags for each row. - if (flag_per_row) - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - if (need_flags) - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); - } - ++selected_right_row_it; - } - } - else - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - } - ++selected_right_row_it; - } - } - if (!any_matched) - { - if (add_missing) - addNotFoundRow(added_columns, total_added_rows); - else - addNotFoundRow(added_columns, total_added_rows); - } - else - { - if (!flag_per_row && need_flags) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if (add_missing) - added_columns.applyLazyDefaults(); - } - find_result_index += (prev_replicated_row != row_replicate_offset[i]); - - if constexpr (need_replication) - { - (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; - } - prev_replicated_row = row_replicate_offset[i]; - } - }; - - while (left_row_iter < left_block_rows && !exceeded_max_block_rows) - { - auto left_start_row = left_row_iter; - collect_keys_matched_rows_refs(); - if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " - "left_start_row: {}", - selected_rows.size(), - current_added_rows, - row_replicate_offset.size(), - left_row_iter, - left_start_row); - } - auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); - copy_final_matched_rows(left_start_row, filter_col); - - if constexpr (need_replication) - { - // Add a check for current_added_rows to avoid run the filter expression on too small size batch. - if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - { - exceeded_max_block_rows = true; - } - } - } - - if constexpr (need_replication) - { - added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); - added_columns.filter.resize_assume_reserved(left_row_iter); - } - added_columns.applyLazyDefaults(); - return left_row_iter; -} - -/// Joins right table columns which indexes are present in right_indexes using specified map. -/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template -NO_INLINE size_t joinRightColumns( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) -{ - constexpr JoinFeatures join_features; - - size_t rows = added_columns.rows_to_add; - if constexpr (need_filter) - added_columns.filter = IColumn::Filter(rows, 0); - - Arena pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t i = 0; - for (; i < rows; ++i) - { - if constexpr (join_features.need_replication) - { - if (unlikely(current_offset >= max_joined_block_rows)) - { - added_columns.offsets_to_replicate->resize_assume_reserved(i); - added_columns.filter.resize_assume_reserved(i); - break; - } - } - - bool right_row_found = false; - - KnownRowsHolder known_rows; - for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) - { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - - if (find_result.isFound()) - { - right_row_found = true; - auto & mapped = find_result.getMapped(); - if constexpr (join_features.is_asof_join) - { - const IColumn & left_asof_key = added_columns.leftAsofKey(); - - auto row_ref = mapped->findAsof(left_asof_key, i); - if (row_ref.block) - { - setUsed(added_columns.filter, i); - if constexpr (flag_per_row) - used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); - else - used_flags.template setUsed(find_result); - - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); - } - else - addNotFoundRow(added_columns, current_offset); - } - else if constexpr (join_features.is_all_join) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) - { - /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); - if (used_once) - { - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - setUsed(added_columns.filter, i); - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - } - else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) - { - bool used_once = used_flags.template setUsedOnce(find_result); - - /// Use first appeared left key only - if (used_once) - { - setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - } - - break; - } - else if constexpr (join_features.is_any_join && join_features.full) - { - /// TODO - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); - } - else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - - if (join_features.is_any_or_semi_join) - { - break; - } - } - } - } - - if (!right_row_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(added_columns.filter, i); - addNotFoundRow(added_columns, current_offset); - } - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - } - - added_columns.applyLazyDefaults(); - return i; -} - -template -size_t joinRightColumnsSwitchMultipleDisjuncts( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) -{ - constexpr JoinFeatures join_features; - if constexpr (join_features.is_all_join) - { - if (added_columns.additional_filter_expression) - { - bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( - std::forward>(key_getter_vector), - mapv, - added_columns, - used_flags, - need_filter, - join_features.need_flags, - join_features.add_missing, - mark_per_row_used); - } - } - - if (added_columns.additional_filter_expression) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); - - return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); -} - -template -size_t joinRightColumnsSwitchNullability( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) -{ - if (added_columns.need_filter) - { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - else - { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } -} - -template -size_t switchJoinRightColumns( - const std::vector & mapv, - AddedColumns & added_columns, - HashJoin::Type type, - JoinStuff::JoinUsedFlags & used_flags) -{ - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - switch (type) - { - case HashJoin::Type::EMPTY: - { - if constexpr (!is_asof_join) - { - using KeyGetter = KeyGetterEmpty; - std::vector key_getter_vector; - key_getter_vector.emplace_back(); - - using MapTypeVal = typename KeyGetter::MappedType; - std::vector a_map_type_vector; - a_map_type_vector.emplace_back(); - return joinRightColumnsSwitchNullability( - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); - } - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); - } - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - { \ - using MapTypeVal = const typename std::remove_reference_t::element_type; \ - using KeyGetter = typename KeyGetterForType::Type; \ - std::vector a_map_type_vector(mapv.size()); \ - std::vector key_getter_vector; \ - for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ - { \ - const auto & join_on_key = added_columns.join_on_keys[d]; \ - a_map_type_vector[d] = mapv[d]->TYPE.get(); \ - key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ - } \ - return joinRightColumnsSwitchNullability( \ - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ - } - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); - } -} - -/** Since we do not store right key columns, - * this function is used to copy left key columns to right key columns. - * If the user requests some right columns, we just copy left key columns to right, since they are equal. - * Example: SELECT t1.key, t2.key FROM t1 FULL JOIN t2 ON t1.key = t2.key; - * In that case for matched rows in t2.key we will use values from t1.key. - * However, in some cases we might need to adjust the type of column, e.g. t1.key :: LowCardinality(String) and t2.key :: String - * Also, the nullability of the column might be different. - * Returns the right column after with necessary adjustments. - */ -ColumnWithTypeAndName copyLeftKeyColumnToRight( - const DataTypePtr & right_key_type, const String & renamed_right_column, const ColumnWithTypeAndName & left_column, const IColumn::Filter * null_map_filter = nullptr) -{ - ColumnWithTypeAndName right_column = left_column; - right_column.name = renamed_right_column; - - if (null_map_filter) - right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); - - bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); - if (null_map_filter) - correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); - else - correctNullabilityInplace(right_column, should_be_nullable); - - if (!right_column.type->equals(*right_key_type)) - { - right_column.column = castColumnAccurate(right_column, right_key_type); - right_column.type = right_key_type; - } - - right_column.column = right_column.column->convertToFullColumnIfConst(); - return right_column; -} - -/// Cut first num_rows rows from block in place and returns block with remaining rows -Block sliceBlock(Block & block, size_t num_rows) -{ - size_t total_rows = block.rows(); - if (num_rows >= total_rows) - return {}; - size_t remaining_rows = total_rows - num_rows; - Block remaining_block = block.cloneEmpty(); - for (size_t i = 0; i < block.columns(); ++i) - { - auto & col = block.getByPosition(i); - remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); - col.column = col.column->cut(0, num_rows); - } - return remaining_block; -} - -} /// nameless - -template -Block HashJoin::joinBlockImpl( - Block & block, - const Block & block_with_columns_to_add, - const std::vector & maps_, - bool is_join_get) const -{ - constexpr JoinFeatures join_features; - - std::vector join_on_keys; - const auto & onexprs = table_join->getClauses(); - for (size_t i = 0; i < onexprs.size(); ++i) - { - const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; - join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, key_sizes[i]); - } - size_t existing_columns = block.columns(); - - /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. - * Because if they are constants, then in the "not joined" rows, they may have different values - * - default values, which can differ from the values of these constants. - */ - if constexpr (join_features.right || join_features.full) - { - materializeBlockInplace(block); - } - - /** For LEFT/INNER JOIN, the saved blocks do not contain keys. - * For FULL/RIGHT JOIN, the saved blocks contain keys; - * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. - * For ASOF, the last column is used as the ASOF column - */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - savedBlockSample(), - *this, - std::move(join_on_keys), - table_join->getMixedJoinExpression(), - join_features.is_asof_join, - is_join_get); - - bool has_required_right_keys = (required_right_keys.columns() != 0); - added_columns.need_filter = join_features.need_filter || has_required_right_keys; - added_columns.max_joined_block_rows = max_joined_block_rows; - if (!added_columns.max_joined_block_rows) - added_columns.max_joined_block_rows = std::numeric_limits::max(); - else - added_columns.reserve(join_features.need_replication); - - size_t num_joined = switchJoinRightColumns(maps_, added_columns, data->type, used_flags); - /// Do not hold memory for join_on_keys anymore - added_columns.join_on_keys.clear(); - Block remaining_block = sliceBlock(block, num_joined); - - added_columns.buildOutput(); - for (size_t i = 0; i < added_columns.size(); ++i) - block.insert(added_columns.moveColumn(i)); - - std::vector right_keys_to_replicate [[maybe_unused]]; - - if constexpr (join_features.need_filter) - { - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); - - /// Add join key columns from right block if needed using value from left table because of equality - for (size_t i = 0; i < required_right_keys.columns(); ++i) - { - const auto & right_key = required_right_keys.getByPosition(i); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(required_right_keys_sources[i]); - const auto & right_col_name = getTableJoin().renamedRightColumnName(right_key.name); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); - block.insert(std::move(right_col)); - } - } - else if (has_required_right_keys) - { - /// Add join key columns from right block if needed. - for (size_t i = 0; i < required_right_keys.columns(); ++i) - { - const auto & right_key = required_right_keys.getByPosition(i); - auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(required_right_keys_sources[i]); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); - block.insert(std::move(right_col)); - - if constexpr (join_features.need_replication) - right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); - } - } - - if constexpr (join_features.need_replication) - { - std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - { - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); - } - - /// Replicate additional right keys - for (size_t pos : right_keys_to_replicate) - { - block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); - } - } - - return remaining_block; -} - -void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const -{ - size_t start_left_row = 0; - size_t start_right_block = 0; - std::unique_ptr reader = nullptr; - if (not_processed) - { - auto & continuation = static_cast(*not_processed); - start_left_row = continuation.left_position; - start_right_block = continuation.right_block; - reader = std::move(continuation.reader); - not_processed.reset(); - } - - size_t num_existing_columns = block.columns(); - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - - ColumnRawPtrs src_left_columns; - MutableColumns dst_columns; - - { - src_left_columns.reserve(num_existing_columns); - dst_columns.reserve(num_existing_columns + num_columns_to_add); - - for (const ColumnWithTypeAndName & left_column : block) - { - src_left_columns.push_back(left_column.column.get()); - dst_columns.emplace_back(src_left_columns.back()->cloneEmpty()); - } - - for (const ColumnWithTypeAndName & right_column : sample_block_with_columns_to_add) - dst_columns.emplace_back(right_column.column->cloneEmpty()); - - for (auto & dst : dst_columns) - dst->reserve(max_joined_block_rows); - } - - size_t rows_left = block.rows(); - size_t rows_added = 0; - for (size_t left_row = start_left_row; left_row < rows_left; ++left_row) - { - size_t block_number = 0; - - auto process_right_block = [&](const Block & block_right) - { - size_t rows_right = block_right.rows(); - rows_added += rows_right; - - for (size_t col_num = 0; col_num < num_existing_columns; ++col_num) - dst_columns[col_num]->insertManyFrom(*src_left_columns[col_num], left_row, rows_right); - - for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num) - { - const IColumn & column_right = *block_right.getByPosition(col_num).column; - dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right); - } - }; - - for (const Block & block_right : data->blocks) - { - ++block_number; - if (block_number < start_right_block) - continue; - - /// The following statement cannot be substituted with `process_right_block(!have_compressed ? block_right : block_right.decompress())` - /// because it will lead to copying of `block_right` even if its branch is taken (because common type of `block_right` and `block_right.decompress()` is `Block`). - if (!have_compressed) - process_right_block(block_right); - else - process_right_block(block_right.decompress()); - - if (rows_added > max_joined_block_rows) - { - break; - } - } - - if (tmp_stream && rows_added <= max_joined_block_rows) - { - if (reader == nullptr) - { - tmp_stream->finishWritingAsyncSafe(); - reader = tmp_stream->getReadStream(); - } - while (auto block_right = reader->read()) - { - ++block_number; - process_right_block(block_right); - if (rows_added > max_joined_block_rows) - { - break; - } - } - - /// It means, that reader->read() returned {} - if (rows_added <= max_joined_block_rows) - { - reader.reset(); - } - } - - start_right_block = 0; - - if (rows_added > max_joined_block_rows) - { - not_processed = std::make_shared( - NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)}); - not_processed->block.swap(block); - break; - } - } - - for (const ColumnWithTypeAndName & src_column : sample_block_with_columns_to_add) - block.insert(src_column); - - block = block.cloneWithColumns(std::move(dst_columns)); -} - -DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const -{ - size_t num_keys = data_types.size(); - if (right_table_keys.columns() != num_keys) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", - toString(or_null ? "OrNull" : ""), toString(num_keys)); - - for (size_t i = 0; i < num_keys; ++i) - { - const auto & left_type_origin = data_types[i]; - const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i); - auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); - auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); - if (!left_type->equals(*right_type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " - "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); - } - - if (!sample_block_with_columns_to_add.has(column_name)) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); - - auto elem = sample_block_with_columns_to_add.getByName(column_name); - if (or_null && JoinCommon::canBecomeNullable(elem.type)) - elem.type = makeNullable(elem.type); - return elem.type; -} - -/// TODO: return multiple columns as named tuple -/// TODO: return array of values when strictness == JoinStrictness::All -ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const -{ - bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) - && kind == JoinKind::Left; - if (!is_valid) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); - const auto & key_names_right = table_join->getOnlyClause().key_names_right; - - /// Assemble the key block with correct names. - Block keys; - for (size_t i = 0; i < block.columns(); ++i) - { - auto key = block.getByPosition(i); - key.name = key_names_right[i]; - keys.insert(std::move(key)); - } - - static_assert(!MapGetter::flagged, - "joinGet are not protected from hash table changes between block processing"); - - std::vector maps_vector; - maps_vector.push_back(&std::get(data->maps[0])); - joinBlockImpl( - keys, block_with_columns_to_add, maps_vector, /* is_join_get = */ true); - return keys.getByPosition(keys.columns() - 1); -} - -void HashJoin::checkTypesOfKeys(const Block & block) const -{ - for (const auto & onexpr : table_join->getClauses()) - { - JoinCommon::checkTypesOfKeys(block, onexpr.key_names_left, right_table_keys, onexpr.key_names_right); - } -} - -void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) -{ - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); - - for (const auto & onexpr : table_join->getClauses()) - { - auto cond_column_name = onexpr.condColumnNames(); - JoinCommon::checkTypesOfKeys( - block, onexpr.key_names_left, cond_column_name.first, - right_sample_block, onexpr.key_names_right, cond_column_name.second); - } - - if (kind == JoinKind::Cross) - { - joinBlockImplCross(block, not_processed); - return; - } - - if (kind == JoinKind::Right || kind == JoinKind::Full) - { - materializeBlockInplace(block); - } - - { - std::vectormaps[0])> * > maps_vector; - for (size_t i = 0; i < table_join->getClauses().size(); ++i) - maps_vector.push_back(&data->maps[i]); - - if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) - { - Block remaining_block = joinBlockImpl(block, sample_block_with_columns_to_add, maps_vector_); - if (remaining_block.rows()) - not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); - else - not_processed.reset(); - })) - { - /// Joined - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong JOIN combination: {} {}", strictness, kind); - } -} - -HashJoin::~HashJoin() -{ - if (!data) - { - LOG_TEST(log, "{}Join data has been already released", instance_log_id); - return; - } - LOG_TEST( - log, - "{}Join data is being destroyed, {} bytes and {} rows in hash table", - instance_log_id, - getTotalByteCount(), - getTotalRowCount()); -} - -template -struct AdderNonJoined -{ - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) - { - constexpr bool mapped_asof = std::is_same_v; - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - - if constexpr (mapped_asof) - { - /// Do nothing - } - else if constexpr (mapped_one) - { - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & mapped_column = mapped.block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, mapped.row_num); - } - - ++rows_added; - } - else - { - for (auto it = mapped.begin(); it.ok(); ++it) - { - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & mapped_column = it->block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, it->row_num); - } - - ++rows_added; - } - } - } -}; - -/// Stream from not joined earlier rows of the right table. -/// Based on: -/// - map offsetInternal saved in used_flags for single disjuncts -/// - flags in BlockWithFlags for multiple disjuncts -class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller -{ -public: - NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool flag_per_row_) - : parent(parent_) - , max_block_size(max_block_size_) - , flag_per_row(flag_per_row_) - , current_block_start(0) - { - if (parent.data == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); - } - - Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } - - size_t fillColumns(MutableColumns & columns_right) override - { - size_t rows_added = 0; - if (unlikely(parent.data->type == HashJoin::Type::EMPTY)) - { - rows_added = fillColumnsFromData(parent.data->blocks, columns_right); - } - else - { - auto fill_callback = [&](auto, auto, auto & map) - { - rows_added = fillColumnsFromMap(map, columns_right); - }; - - if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); - } - - if (!flag_per_row) - { - fillNullsFromBlocks(columns_right, rows_added); - } - - return rows_added; - } - -private: - const HashJoin & parent; - UInt64 max_block_size; - bool flag_per_row; - - size_t current_block_start; - - std::any position; - std::optional nulls_position; - std::optional used_position; - - size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right) - { - if (!position.has_value()) - position = std::make_any(blocks.begin()); - - auto & block_it = std::any_cast(position); - auto end = blocks.end(); - - size_t rows_added = 0; - for (; block_it != end; ++block_it) - { - size_t rows_from_block = std::min(max_block_size - rows_added, block_it->rows() - current_block_start); - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & col = block_it->getByPosition(j).column; - columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block); - } - rows_added += rows_from_block; - - if (rows_added >= max_block_size) - { - /// How many rows have been read - current_block_start += rows_from_block; - if (block_it->rows() <= current_block_start) - { - /// current block was fully read - ++block_it; - current_block_start = 0; - } - break; - } - current_block_start = 0; - } - return rows_added; - } - - template - size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) - { - switch (parent.data->type) - { - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return fillColumns(*maps.TYPE, columns_keys_and_right); - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); - } - } - - template - size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) - { - size_t rows_added = 0; - - if (flag_per_row) - { - if (!used_position.has_value()) - used_position = parent.data->blocks.begin(); - - auto end = parent.data->blocks.end(); - - for (auto & it = *used_position; it != end && rows_added < max_block_size; ++it) - { - const Block & mapped_block = *it; - - for (size_t row = 0; row < mapped_block.rows(); ++row) - { - if (!parent.isUsed(&mapped_block, row)) - { - for (size_t colnum = 0; colnum < columns_keys_and_right.size(); ++colnum) - { - columns_keys_and_right[colnum]->insertFrom(*mapped_block.getByPosition(colnum).column, row); - } - - ++rows_added; - } - } - } - } - else - { - using Mapped = typename Map::mapped_type; - using Iterator = typename Map::const_iterator; - - - if (!position.has_value()) - position = std::make_any(map.begin()); - - Iterator & it = std::any_cast(position); - auto end = map.end(); - - for (; it != end; ++it) - { - const Mapped & mapped = it->getMapped(); - - size_t offset = map.offsetInternal(it.getPtr()); - if (parent.isUsed(offset)) - continue; - AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); - - if (rows_added >= max_block_size) - { - ++it; - break; - } - } - } - - return rows_added; - } - - void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) - { - if (!nulls_position.has_value()) - nulls_position = parent.data->blocks_nullmaps.begin(); - - auto end = parent.data->blocks_nullmaps.end(); - - for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) - { - const auto * block = it->first; - ConstNullMapPtr nullmap = nullptr; - if (it->second) - nullmap = &assert_cast(*it->second).getData(); - - for (size_t row = 0; row < block->rows(); ++row) - { - if (nullmap && (*nullmap)[row]) - { - for (size_t col = 0; col < columns_keys_and_right.size(); ++col) - columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); - ++rows_added; - } - } - } - } -}; - -IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, - const Block & result_sample_block, - UInt64 max_block_size) const -{ - if (!JoinCommon::hasNonJoinedBlocks(*table_join)) - return {}; - size_t left_columns_count = left_sample_block.columns(); - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - if (!flag_per_row) - { - /// With multiple disjuncts, all keys are in sample_block_with_columns_to_add, so invariant is not held - size_t expected_columns_count = left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns(); - if (expected_columns_count != result_sample_block.columns()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of columns in result sample block: {} instead of {} ({} + {} + {})", - result_sample_block.columns(), expected_columns_count, - left_columns_count, required_right_keys.columns(), sample_block_with_columns_to_add.columns()); - } - } - - auto non_joined = std::make_unique(*this, max_block_size, flag_per_row); - return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); -} - -void HashJoin::reuseJoinedData(const HashJoin & join) -{ - data = join.data; - from_storage_join = true; - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - if (flag_per_row) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); - - for (auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) - { - used_flags.reinit(map_.getBufferSizeInCells(data->type) + 1); - }); - } -} - -BlocksList HashJoin::releaseJoinedBlocks(bool restructure) -{ - LOG_TRACE(log, "{}Join data is being released, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); - - BlocksList right_blocks = std::move(data->blocks); - if (!restructure) - { - data.reset(); - return right_blocks; - } - - data->maps.clear(); - data->blocks_nullmaps.clear(); - - BlocksList restored_blocks; - - /// names to positions optimization - std::vector positions; - std::vector is_nullable; - if (!right_blocks.empty()) - { - positions.reserve(right_sample_block.columns()); - const Block & tmp_block = *right_blocks.begin(); - for (const auto & sample_column : right_sample_block) - { - positions.emplace_back(tmp_block.getPositionByName(sample_column.name)); - is_nullable.emplace_back(isNullableOrLowCardinalityNullable(sample_column.type)); - } - } - - for (Block & saved_block : right_blocks) - { - Block restored_block; - for (size_t i = 0; i < positions.size(); ++i) - { - auto & column = saved_block.getByPosition(positions[i]); - correctNullabilityInplace(column, is_nullable[i]); - restored_block.insert(column); - } - restored_blocks.emplace_back(std::move(restored_block)); - } - - data.reset(); - return restored_blocks; -} - -const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const -{ - /// It should be nullable when right side is nullable - return savedBlockSample().getByName(table_join->getOnlyClause().key_names_right.back()); -} - -void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additional_filter_expression) -{ - if (!additional_filter_expression) - return; - - Block expression_sample_block = additional_filter_expression->getSampleBlock(); - - if (expression_sample_block.columns() != 1) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected expression in JOIN ON section. Expected single column, got '{}'", - expression_sample_block.dumpStructure()); - } - - auto type = removeNullable(expression_sample_block.getByPosition(0).type); - if (!type->equals(*std::make_shared())) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected expression in JOIN ON section. Expected boolean (UInt8), got '{}'. expression:\n{}", - expression_sample_block.getByPosition(0).type->getName(), - additional_filter_expression->dumpActions()); - } - - bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); - if (!is_supported) - { - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs", - expression_sample_block.getByPosition(0).name); - } -} - -bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const -{ - if (!table_join_->oneDisjunct()) - return true; - /// If it'a a all right join with inequal conditions, we need to mark each row - if (table_join_->getMixedJoinExpression() && isRightOrFull(table_join_->kind())) - return true; - return false; -} - -} diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index c8ef2d41ade..fa8ebd2c0f0 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -536,6 +536,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) { block_to_save = block_to_save.compress(); + have_compressed = true; } data->blocks_allocated_size += block_to_save.allocatedBytes(); @@ -762,14 +763,18 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) } }; - for (const Block & compressed_block_right : data->blocks) + for (const Block & block_right : data->blocks) { ++block_number; if (block_number < start_right_block) continue; + /// The following statement cannot be substituted with `process_right_block(!have_compressed ? block_right : block_right.decompress())` + /// because it will lead to copying of `block_right` even if its branch is taken (because common type of `block_right` and `block_right.decompress()` is `Block`). + if (!have_compressed) + process_right_block(block_right); + else + process_right_block(block_right.decompress()); - auto block_right = compressed_block_right.decompress(); - process_right_block(block_right); if (rows_added > max_joined_block_rows) { break; diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index b1a497c1c1c..0dfafa94efc 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -96,7 +96,7 @@ public: case HashJoin::Type::TYPE: \ return insertFromBlockImplTypeCase>::Type>(\ join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ - break; + break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -170,20 +170,20 @@ public: { /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); /// Add join key columns from right block if needed using value from left table because of equality for (size_t i = 0; i < join.required_right_keys.columns(); ++i) { - const auto & right_key = join.required_right_keys.getByPosition(i); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; + const auto & right_key = join.required_right_keys.getByPosition(i); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); - block.insert(std::move(right_col)); + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); + block.insert(std::move(right_col)); } } else if (has_required_right_keys) @@ -191,18 +191,18 @@ public: /// Add join key columns from right block if needed. for (size_t i = 0; i < join.required_right_keys.columns(); ++i) { - const auto & right_key = join.required_right_keys.getByPosition(i); - auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; + const auto & right_key = join.required_right_keys.getByPosition(i); + auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); - block.insert(std::move(right_col)); + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); + block.insert(std::move(right_col)); - if constexpr (join_features.need_replication) - right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); + if constexpr (join_features.need_replication) + right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); } } @@ -213,13 +213,13 @@ public: /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) { - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } /// Replicate additional right keys for (size_t pos : right_keys_to_replicate) { - block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); } } @@ -310,7 +310,7 @@ private: } #define M(TYPE) \ case HashJoin::Type::TYPE: \ - { \ + { \ using MapTypeVal = const typename std::remove_reference_t::element_type; \ using KeyGetter = typename KeyGetterForType::Type; \ std::vector a_map_type_vector(mapv.size()); \ @@ -341,12 +341,12 @@ private: { if (added_columns.need_filter) { - return joinRightColumnsSwitchMultipleDisjuncts( + return joinRightColumnsSwitchMultipleDisjuncts( std::forward>(key_getter_vector), mapv, added_columns, used_flags); } else { - return joinRightColumnsSwitchMultipleDisjuncts( + return joinRightColumnsSwitchMultipleDisjuncts( std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } @@ -412,12 +412,12 @@ private: { if constexpr (join_features.need_replication) { - if (unlikely(current_offset >= max_joined_block_rows)) - { + if (unlikely(current_offset >= max_joined_block_rows)) + { added_columns.offsets_to_replicate->resize_assume_reserved(i); added_columns.filter.resize_assume_reserved(i); break; - } + } } bool right_row_found = false; @@ -425,16 +425,16 @@ private: KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - if (find_result.isFound()) - { + if (find_result.isFound()) + { right_row_found = true; auto & mapped = find_result.getMapped(); if constexpr (join_features.is_asof_join) @@ -507,7 +507,7 @@ private: break; } } - } + } } if (!right_row_found) @@ -821,9 +821,7 @@ private: { // Add a check for current_added_rows to avoid run the filter expression on too small size batch. if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - { exceeded_max_block_rows = true; - } } } From df8341c447ecf1775e27dcdd6ea09829d9e35880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 21 Jun 2024 18:30:45 +0200 Subject: [PATCH 034/140] Try to improve low number itoa --- base/base/itoa.cpp | 75 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index e7250764704..0997daebbf6 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -3,6 +3,34 @@ #include #include +namespace +{ +ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value) +{ + *p = '0' + value; + return p + 1; +} + +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) +{ + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; +} + namespace jeaiii { /* @@ -84,43 +112,48 @@ template inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) { constexpr auto q = sizeof(T); - using U = cond>>; + using U = cond>>; // convert bool to int before test with unary + to silence warning if T happens to be bool U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - if (n < UInt32(1e2)) + if (n < U(1e2)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) { - if (n < UInt32(1e4)) + if (sizeof(U) == 1 || n < U(1e4)) { auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < UInt32(1e3); + if constexpr (sizeof(U) == 1) + b -= 1; + else + b -= n < U(1e3); auto f2 = (f0 & mask24) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; return b + 4; } auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e5); + if constexpr (sizeof(U) == 2) + b -= 1; + else + b -= n < U(1e5); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; return b + 6; } - if (n < UInt64(1ull << 32ull)) + if (sizeof(U) == 4 || n < UInt64(1ull << 32ull)) { - if (n < UInt32(1e8)) + if (n < U(1e8)) { auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e7); + b -= n < U(1e7); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; @@ -248,28 +281,6 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) } } -namespace -{ -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; const int max_multiple_of_hundred_blocks = 9; static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0); From 636f2506f01e040c450d77bca29dfa8811f00575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jun 2024 11:35:29 +0200 Subject: [PATCH 035/140] Silence tidy --- base/base/itoa.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 0997daebbf6..c17a2bfd999 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -59,8 +59,8 @@ namespace jeaiii struct pair { char dd[2]; - constexpr pair(char c) : dd{c, '\0'} { } - constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } + constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor) + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor) }; constexpr struct From 98c9928f5bba4577912bba3e09a5a0c1463ecb7a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 00:54:59 +0200 Subject: [PATCH 036/140] Speed up loading of plain_rewritable disks; fix many bugs in ObjectStorage --- src/Disks/IDisk.cpp | 2 +- ...torageFromPlainRewritableObjectStorage.cpp | 70 +++++++++++++------ ...aStorageFromPlainRewritableObjectStorage.h | 1 + .../ObjectStorages/ObjectStorageIterator.h | 19 +++++ .../ObjectStorageIteratorAsync.cpp | 18 ++--- .../ObjectStorageIteratorAsync.h | 3 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 + src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 5 +- src/Storages/MergeTree/MergeTreeData.h | 2 +- 10 files changed, 89 insertions(+), 35 deletions(-) diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 5f0ca850b40..4bbefad5290 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -186,7 +186,7 @@ void IDisk::checkAccess() DB::UUID server_uuid = DB::ServerUUID::get(); if (server_uuid == DB::UUIDHelpers::Nil) throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized"); - const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid)); + const String path = fmt::format("clickhouse_access_check_{}", toString(server_uuid)); checkAccessImpl(path); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index cc77ca5364b..bf9d6608309 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -1,6 +1,8 @@ #include +#include #include +#include #include #include #include "CommonPathPrefixKeyGenerator.h" @@ -22,34 +24,62 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri { MetadataStorageFromPlainObjectStorage::PathMap result; - RelativePathsWithMetadata files; - object_storage->listObjects(root, files, 0); - for (const auto & file : files) + ThreadPool & pool = getIOThreadPool().get(); + ThreadPoolCallbackRunnerLocal runner(pool, "PlainRWMetaLoad"); + std::mutex mutex; + + LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage"); + + ReadSettings settings; + settings.enable_filesystem_cache = false; + settings.remote_fs_method = RemoteFSReadMethod::read; + settings.remote_fs_buffer_size = 1024; /// These files are small. + + LOG_DEBUG(log, "Loading metadata"); + size_t num_files = 0; + for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next()) { - auto remote_path = std::filesystem::path(file->relative_path); + ++num_files; + auto file = iterator->current(); + String path = file->getPath(); + auto remote_path = std::filesystem::path(path); if (remote_path.filename() != PREFIX_PATH_FILE_NAME) continue; - StoredObject object{file->relative_path}; + runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings] + { + setThreadName("PlainRWMetaLoad"); - auto read_buf = object_storage->readObject(object); - String local_path; - readStringUntilEOF(local_path, *read_buf); + StoredObject object{path}; - chassert(remote_path.has_parent_path()); - auto res = result.emplace(local_path, remote_path.parent_path()); + /// TODO: If an object was removed just now, skip it. + auto read_buf = object_storage->readObject(object, settings); + String local_path; + readStringUntilEOF(local_path, *read_buf); - /// This can happen if table replication is enabled, then the same local path is written - /// in `prefix.path` of each replica. - /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? - if (!res.second) - LOG_WARNING( - getLogger("MetadataStorageFromPlainObjectStorage"), - "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", - local_path, - res.first->second, - remote_path.parent_path().string()); + chassert(remote_path.has_parent_path()); + std::pair res; + { + std::lock_guard lock(mutex); + res = result.emplace(local_path, remote_path.parent_path()); + } + + /// This can happen if table replication is enabled, then the same local path is written + /// in `prefix.path` of each replica. + /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? + if (!res.second) + LOG_WARNING( + log, + "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", + local_path, + res.first->second, + remote_path.parent_path().string()); + }); } + + runner.waitForAllToFinishAndRethrowFirstError(); + LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size()); + auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, result.size()); return result; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index 661968d7044..a5394b9428d 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -4,6 +4,7 @@ #include + namespace DB { diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 26c3c690ba5..026b9c4fcd7 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -9,12 +9,30 @@ namespace DB class IObjectStorageIterator { public: + /// Moves iterator to the next element. If the iterator not isValid, the behavior is undefined. virtual void next() = 0; + + /// Skips all the remaining elements in the current batch (if any), + /// and moves the iterator to the first element of the next batch, + /// or, if there is no more batches, the iterator becomes invalid. + /// If the iterator not isValid, the behavior is undefined. virtual void nextBatch() = 0; + + /// Check if the iterator is valid, which means the `current` method can be called. virtual bool isValid() = 0; + + /// Return the current element. virtual RelativePathWithMetadataPtr current() = 0; + + /// Return the current batch of elements. + /// It is unspecified how batches are formed. + /// But this method can be used for more efficient processing. virtual RelativePathsWithMetadata currentBatch() = 0; + + /// This will initiate prefetching the next batch in background, so it can be obtained faster when needed. virtual std::optional getCurrentBatchAndScheduleNext() = 0; + + /// Returns the number of elements in the batches that were fetched so far. virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; @@ -25,6 +43,7 @@ using ObjectStorageIteratorPtr = std::shared_ptr; class ObjectStorageIteratorFromList : public IObjectStorageIterator { public: + /// Everything is represented by just a single batch. explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_) : batch(std::move(batch_)) , batch_iterator(batch.begin()) {} diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index a249789df4b..ff94e878586 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -37,10 +37,11 @@ void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); - if (is_finished) + if (!has_next_batch) { current_batch.clear(); current_batch_iterator = current_batch.begin(); + is_finished = true; return; } @@ -60,14 +61,13 @@ void IObjectStorageIteratorAsync::nextBatch() accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - if (result.has_next) + has_next_batch = result.has_next; + if (has_next_batch) outcome_future = scheduleBatch(); - else - is_finished = true; } catch (...) { - is_finished = true; + has_next_batch = false; throw; } } @@ -76,10 +76,12 @@ void IObjectStorageIteratorAsync::next() { std::lock_guard lock(mutex); + if (is_finished) + return; + + ++current_batch_iterator; if (current_batch_iterator == current_batch.end()) nextBatch(); - else - ++current_batch_iterator; } std::future IObjectStorageIteratorAsync::scheduleBatch() @@ -99,7 +101,7 @@ bool IObjectStorageIteratorAsync::isValid() if (!is_initialized) nextBatch(); - return current_batch_iterator != current_batch.end(); + return !is_finished; } RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current() diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index cb4818d01ae..01371415124 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -35,7 +35,7 @@ public: void deactivate(); protected: - + /// This method fetches the next batch, and returns true if there are more batches after it. virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0; struct BatchAndHasNext @@ -48,6 +48,7 @@ protected: bool is_initialized{false}; bool is_finished{false}; + bool has_next_batch{true}; bool deactivated{false}; mutable std::recursive_mutex mutex; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 63e11dcd8c8..64af5df3634 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -293,6 +293,8 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = s3_settings.get(); + if (!max_keys) + max_keys = settings_ptr->list_object_keys_size; return std::make_shared(uri.bucket, path_prefix, client.get(), max_keys); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 7446a1f6fc8..b2f2cfbecaf 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -67,7 +67,7 @@ private: } public: - template + template explicit S3ObjectStorage(std::unique_ptr && client_, Args && ...args) : S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward(args)...) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2e0ea4cdbcd..0d72da7d264 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -283,13 +283,12 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re } } - - // When data path or file not exists, ignore the format_version check + /// When data path or file not exists, ignore the format_version check if (!attach || !read_format_version) { format_version = min_format_version; - // try to write to first non-readonly disk + /// Try to write to first non-readonly disk for (const auto & disk : getStoragePolicy()->getDisks()) { if (disk->isBroken()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c6f736a4afd..ba227a00506 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1143,7 +1143,7 @@ protected: struct TagByInfo{}; struct TagByStateAndInfo{}; - void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories=true); + void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories = true); static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part) { From 888d6a29b8ef5b2b076713021385fe8b2370b423 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 01:16:42 +0200 Subject: [PATCH 037/140] Support filesystem that is modified under our feet --- ...torageFromPlainRewritableObjectStorage.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index bf9d6608309..c2b3bd8f4f0 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -3,10 +3,12 @@ #include #include +#include #include #include #include "CommonPathPrefixKeyGenerator.h" + namespace DB { @@ -51,11 +53,21 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri setThreadName("PlainRWMetaLoad"); StoredObject object{path}; - - /// TODO: If an object was removed just now, skip it. - auto read_buf = object_storage->readObject(object, settings); String local_path; - readStringUntilEOF(local_path, *read_buf); + + try + { + auto read_buf = object_storage->readObject(object, settings); + readStringUntilEOF(local_path, *read_buf); + } + catch (const S3Exception & e) + { + /// It is ok if a directory was removed just now. + /// We support attaching a filesystem that is concurrently modified by someone else. + if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY) + return; + throw; + } chassert(remote_path.has_parent_path()); std::pair res; From f7b0cecbc13530896d16da9a7c3d0fc94a9acf8f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 01:37:14 +0200 Subject: [PATCH 038/140] Do not load inactive parts on readonly disks --- programs/local/LocalServer.cpp | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cb1c35743b2..00fb79e1af9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -145,7 +145,6 @@ void LocalServer::initialize(Poco::Util::Application & self) config().getUInt("max_io_thread_pool_free_size", 0), config().getUInt("io_thread_pool_queue_size", 10000)); - const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64); getActivePartsLoadingThreadPool().initialize( active_parts_loading_threads, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0d72da7d264..d6d1acb3585 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1758,11 +1758,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); + bool all_disks_are_readonly = true; for (size_t i = 0; i < disks.size(); ++i) { const auto & disk_ptr = disks[i]; if (disk_ptr->isBroken()) continue; + if (!disk_ptr->isReadOnly()) + all_disks_are_readonly = false; auto & disk_parts = parts_to_load_by_disk[i]; auto & unexpected_disk_parts = unexpected_parts_to_load_by_disk[i]; @@ -1915,7 +1918,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalrenameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes @@ -1960,7 +1962,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional Date: Tue, 25 Jun 2024 01:45:19 +0200 Subject: [PATCH 039/140] Fix fast test --- .../MetadataStorageFromPlainRewritableObjectStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index c2b3bd8f4f0..7718fba9c28 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -60,6 +60,7 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri auto read_buf = object_storage->readObject(object, settings); readStringUntilEOF(local_path, *read_buf); } +#if USE_AWS_S3 catch (const S3Exception & e) { /// It is ok if a directory was removed just now. @@ -68,6 +69,11 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri return; throw; } +#endif + catch (...) + { + throw; + } chassert(remote_path.has_parent_path()); std::pair res; From 20b36153e92e089465df1c4f77befd4d0cfdd3fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 01:50:36 +0200 Subject: [PATCH 040/140] Fix typo --- .../ObjectStorages/MetadataStorageFromPlainObjectStorage.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index f290a762205..66da0f2431e 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -22,8 +22,7 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr Date: Tue, 25 Jun 2024 02:13:47 +0200 Subject: [PATCH 041/140] Fix a typo --- src/Disks/IDisk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 658acb01c74..920adf06b1e 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -427,7 +427,7 @@ public: /// Device: 10301h/66305d Inode: 3109907 Links: 1 /// Why we have always zero by default? Because normal filesystem /// manages hardlinks by itself. So you can always remove hardlink and all - /// other alive harlinks will not be removed. + /// other alive hardlinks will not be removed. virtual UInt32 getRefCount(const String &) const { return 0; } /// Revision is an incremental counter of disk operation. From b694abd5c9910bf5199141d4afef658a7f35a0c6 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jun 2024 14:18:14 +0000 Subject: [PATCH 042/140] do not optimize with group_by_use_nulls --- .../Passes/FunctionToSubcolumnsPass.cpp | 20 +++++++++++++------ ...71_function_to_subcolumns_fuzzer.reference | 6 ++++++ .../03171_function_to_subcolumns_fuzzer.sql | 10 ++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index bc2028e1b43..90051779a26 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -273,18 +273,26 @@ public: if (const auto * join_node = node->as()) { - has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls; + return; + } + + if (const auto * query_node = node->as()) + { + if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets()) + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls; return; } } std::unordered_set getIdentifiersToOptimize() const { - if (has_join_use_nulls) + if (can_wrap_result_columns_with_nullable) { /// Do not optimize if we have JOIN with setting join_use_null. + /// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls. /// It may change the behaviour if subcolumn can be converted - /// to nullable while the original column cannot. + /// to Nullable while the original column cannot (e.g. for Array type). return {}; } @@ -323,7 +331,7 @@ private: std::unordered_map optimized_identifiers_count; NameSet processed_tables; - bool has_join_use_nulls = false; + bool can_wrap_result_columns_with_nullable = false; void enterImpl(const TableNode & table_node) { @@ -342,9 +350,9 @@ private: const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + + add_key_columns(primary_key_columns); add_key_columns(partition_key_columns); for (const auto & index : metadata_snapshot->getSecondaryIndices()) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference index be47c4ab571..1fc6683620c 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -1,3 +1,9 @@ 1 2 1 3 0 +0 450 +1 460 +2 470 +3 480 +4 490 +\N 4950 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql index 587288bbfdf..f10019a78dd 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -37,3 +37,13 @@ FULL OUTER JOIN WHERE empty(arr); DROP TABLE t_func_to_subcolumns_join; + +DROP TABLE IF EXISTS t_func_to_subcolumns_use_nulls; + +CREATE TABLE t_func_to_subcolumns_use_nulls (arr Array(UInt64), v UInt64) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_use_nulls SELECT range(number % 10), number FROM numbers(100); + +SELECT length(arr) AS n, sum(v) FROM t_func_to_subcolumns_use_nulls GROUP BY n WITH ROLLUP HAVING n <= 4 OR isNull(n) ORDER BY n SETTINGS group_by_use_nulls = 1; + +DROP TABLE t_func_to_subcolumns_use_nulls; From c8bca3135de71d4adafe74c415adfc14683ad7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20Dug=C3=A9=20de=20Bernonville?= Date: Wed, 26 Jun 2024 14:51:21 +0200 Subject: [PATCH 043/140] fix odbc and nullable fields --- programs/odbc-bridge/ODBCSource.cpp | 13 ++++- .../integration/test_odbc_interaction/test.py | 55 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06cbe70f7c6..0d0d7a0afb1 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -40,6 +40,16 @@ create_table_sql_template = """ PRIMARY KEY (`id`)) ENGINE=InnoDB; """ +create_table_sql_nullable_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` integer not null, + `col1` integer, + `col2` decimal(15,10), + `col3` varchar(32), + `col4` datetime + ) + """ + def skip_test_msan(instance): if instance.is_built_with_memory_sanitizer(): @@ -77,6 +87,11 @@ def create_mysql_db(conn, name): cursor.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) +def create_mysql_nullable_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_nullable_template.format(table_name)) + + def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_sql_template.format(table_name)) @@ -192,6 +207,46 @@ def started_cluster(): cluster.shutdown() +def test_mysql_odbc_select_nullable(started_cluster): + skip_test_msan(node1) + mysql_setup = node1.odbc_drivers["MySQL"] + + table_name = "test_insert_nullable_select" + conn = get_mysql_conn() + create_mysql_nullable_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(1, 1, 1.23456, 'data1', '2010-01-01 00:00:00');".format( + table_name + ) + ) + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(2, NULL, NULL, NULL, NULL);".format( + table_name + ) + ) + conn.commit() + + node1.query( + """ + CREATE TABLE {}(id UInt32, col1 Nullable(UInt32), col2 Nullable(Decimal(15, 10)), col3 Nullable(String), col4 Nullable(DateTime)) ENGINE = ODBC('DSN={}', 'clickhouse', '{}'); + """.format( + table_name, mysql_setup["DSN"], table_name + ) + ) + + assert ( + node1.query( + "SELECT id, col1, col2, col3, col4 from {} order by id asc".format( + table_name + ) + ) + == "1\t1\t1.23456\tdata1\t2010-01-01 00:00:00\n2\t\\N\t\\N\t\\N\t\\N\n" + ) + drop_mysql_table(conn, table_name) + conn.close() + + def test_mysql_simple_select_works(started_cluster): skip_test_msan(node1) From 8f343bffc7f0473e0595c5928e69726d2583b8cc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 27 Jun 2024 17:17:07 +0800 Subject: [PATCH 044/140] update doc --- docs/en/sql-reference/functions/string-functions.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a258456345e..31d89060882 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -571,7 +571,6 @@ SELECT upper('clickhouse'); Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. @@ -579,7 +578,6 @@ If the length of the UTF-8 byte sequence is different for upper and lower case o Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. From 0bf26dbeac3e0cb6b521bf8ec9181127594c2161 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 16 May 2024 14:54:50 +0000 Subject: [PATCH 045/140] Forbid POPULATE with Replicated databases --- src/Interpreters/InterpreterCreateQuery.cpp | 6 +++--- ...33_replicated_database_forbid_create_as_select.reference | 1 + .../02933_replicated_database_forbid_create_as_select.sh | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7272e10b801..7188bd166f4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && create.select) + if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1315,11 +1315,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) is_storage_replicated = true; } - const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 8a6904b6bd7..df060ee2612 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -11,6 +11,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden ${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 6dc90798c2bce90fd5a2fbf73d69575e2f4bd693 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 12:43:59 +0000 Subject: [PATCH 046/140] add setting database_replicated_allow_heavy_create --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 6 ++++-- ...933_replicated_database_forbid_create_as_select.sh | 11 +++++++++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 41878142bdc..13751b3d1a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -732,6 +732,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index fba6386b9bd..ee013907353 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"database_replicated_allow_heavy_create", true, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine."}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7188bd166f4..4e4598a2574 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; + if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1319,7 +1320,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index df060ee2612..831963cca8d 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -9,10 +9,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" + # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" + # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" + +# But it is allowed with the special setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 8d0834eadeeea4b2cd36ba8ff50bdac2d7cd3b35 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 14:52:25 +0000 Subject: [PATCH 047/140] fix --- .../02933_replicated_database_forbid_create_as_select.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 831963cca8d..15f169d880f 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -15,11 +15,11 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" # But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From e0163a0a9f8fec3f8bb9a9c17879205aa1e50c2c Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Fri, 28 Jun 2024 09:32:36 +0000 Subject: [PATCH 048/140] fix EmbeddedRocksDB with TTL writes wrong data Signed-off-by: Duc Canh Le --- .../RocksDB/EmbeddedRocksDBBulkSink.cpp | 46 ++++++++++++++++--- .../RocksDB/EmbeddedRocksDBBulkSink.h | 7 +-- .../02956_rocksdb_with_ttl.reference | 2 + .../0_stateless/02956_rocksdb_with_ttl.sql | 12 +++++ 4 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02956_rocksdb_with_ttl.reference create mode 100644 tests/queries/0_stateless/02956_rocksdb_with_ttl.sql diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 0baa234e7a3..688e7226a8c 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -1,11 +1,8 @@ -#include #include #include #include #include #include -#include -#include #include #include #include @@ -18,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -155,7 +153,8 @@ std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) return {}; } -std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const +template +std::pair EmbeddedRocksDBBulkSink::serializeChunks(const std::vector & input_chunks) const { auto serialized_key_column = ColumnString::create(); auto serialized_value_column = ColumnString::create(); @@ -168,14 +167,48 @@ std::pair EmbeddedRocksDBBulkSink::seriali WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); - for (auto && chunk : input_chunks) + /// TTL handling + [[maybe_unused]] auto encode_fixed_32 = [](char * dst, int32_t value) { + if constexpr (std::endian::native == std::endian::little) + { + memcpy(dst, &value, sizeof(value)); + } + else + { + dst[0] = value & 0xff; + dst[1] = (value >> 8) & 0xff; + dst[2] = (value >> 16) & 0xff; + dst[3] = (value >> 24) & 0xff; + } + }; + [[maybe_unused]] auto get_rocksdb_ts = [this, &encode_fixed_32](char * ts_string) + { + int64_t curtime = -1; + auto * system_clock = storage.rocksdb_ptr->GetEnv()->GetSystemClock().get(); + rocksdb::Status st = system_clock->GetCurrentTime(&curtime); + if (!st.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB error: {}", st.ToString()); + encode_fixed_32(ts_string, static_cast(curtime)); + }; + + for (const auto & chunk : input_chunks) + { + [[maybe_unused]] char ts_string[4]; + if constexpr (with_timestamp) + get_rocksdb_ts(ts_string); + const auto & columns = chunk.getColumns(); auto rows = chunk.getNumRows(); for (size_t i = 0; i < rows; ++i) { for (size_t idx = 0; idx < columns.size(); ++idx) serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, {}); + + /// Append timestamp to end of value, see DBWithTTLImpl::AppendTS::AppendTS + if constexpr (with_timestamp) + writeString(ts_string, 4, writer_value); + /// String in ColumnString must be null-terminated writeChar('\0', writer_key); writeChar('\0', writer_value); @@ -198,7 +231,8 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) if (chunks_to_write.empty()) return; - auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); + auto [serialized_key_column, serialized_value_column] + = storage.ttl > 0 ? serializeChunks(std::move(chunks_to_write)) : serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 46193b152ca..f1f19d81692 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -1,7 +1,5 @@ #pragma once -#include -#include #include #include #include @@ -49,7 +47,10 @@ private: bool isEnoughSize(const std::vector & input_chunks) const; bool isEnoughSize(const Chunk & chunk) const; /// Serialize chunks to rocksdb key-value pairs - std::pair serializeChunks(std::vector && input_chunks) const; + template + std::pair serializeChunks(const std::vector & input_chunks) const; + /// Generate SST file path + String getTemporarySSTFilePath(size_t seq_id); StorageEmbeddedRocksDB & storage; StorageMetadataPtr metadata_snapshot; diff --git a/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference b/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference new file mode 100644 index 00000000000..508f73c8d78 --- /dev/null +++ b/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference @@ -0,0 +1,2 @@ +0 foo +0 diff --git a/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql new file mode 100644 index 00000000000..88259a38d4c --- /dev/null +++ b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql @@ -0,0 +1,12 @@ +-- Tags: no-ordinary-database, use-rocksdb + +CREATE TABLE dict_with_ttl (key UInt64, value String) ENGINE = EmbeddedRocksDB(2) PRIMARY KEY (key); +INSERT INTO dict_with_ttl VALUES (0, 'foo'); +-- Data inserted correctly +SELECT * FROM dict_with_ttl; +-- If possible, we should test that even we execute OPTIMIZE TABLE, the data is still there if TTL is not expired yet +-- Nevertheless, query time is unpredictable with different builds, so we can't test it. So we only test that after 3s +-- we execute OPTIMIZE and the data should be gone. +SELECT sleep(3); +OPTIMIZE TABLE dict_with_ttl; +SELECT * FROM dict_with_ttl; From b0bdbda27e297f86616eccd6447166fa795ef4b4 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Fri, 28 Jun 2024 10:35:39 +0000 Subject: [PATCH 049/140] small fixes Signed-off-by: Duc Canh Le --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 8 ++++---- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 15b759c3f4a..4b444da97a3 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -155,7 +155,7 @@ std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) } template -std::pair EmbeddedRocksDBBulkSink::serializeChunks(const std::vector & input_chunks) const +std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const { auto serialized_key_column = ColumnString::create(); auto serialized_value_column = ColumnString::create(); @@ -167,9 +167,9 @@ std::pair EmbeddedRocksDBBulkSink::seriali auto & serialized_value_offsets = serialized_value_column->getOffsets(); WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); + FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row /// TTL handling - FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row [[maybe_unused]] auto encode_fixed_32 = [](char * dst, int32_t value) { if constexpr (std::endian::native == std::endian::little) @@ -194,7 +194,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali encode_fixed_32(ts_string, static_cast(curtime)); }; - for (const auto & chunk : input_chunks) + for (auto && chunk : input_chunks) { [[maybe_unused]] char ts_string[4]; if constexpr (with_timestamp) @@ -207,7 +207,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali for (size_t idx = 0; idx < columns.size(); ++idx) serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, format_settings); - /// Append timestamp to end of value, see DBWithTTLImpl::AppendTS::AppendTS + /// Append timestamp to end of value, see rocksdb::DBWithTTLImpl::AppendTS if constexpr (with_timestamp) writeString(ts_string, 4, writer_value); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index f1f19d81692..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -48,9 +48,7 @@ private: bool isEnoughSize(const Chunk & chunk) const; /// Serialize chunks to rocksdb key-value pairs template - std::pair serializeChunks(const std::vector & input_chunks) const; - /// Generate SST file path - String getTemporarySSTFilePath(size_t seq_id); + std::pair serializeChunks(std::vector && input_chunks) const; StorageEmbeddedRocksDB & storage; StorageMetadataPtr metadata_snapshot; From 146a7e13d950cb6b122fe23d336092cbe009b7d3 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 27 Jun 2024 08:44:46 +0000 Subject: [PATCH 050/140] Throw exception in bitTest when position is out of bound This happens whenever the number of bit positions is bigger than the number of bits in the number, or when the bit position is negative. --- src/Functions/bitTest.cpp | 14 +++++++++-- .../00967_ubsan_bit_test.reference | 1 - .../0_stateless/00967_ubsan_bit_test.sql | 1 - .../01082_bit_test_out_of_bound.reference | 24 +++++++++++++++++++ .../01082_bit_test_out_of_bound.sql | 5 ++++ .../01710_minmax_count_projection.sql | 2 +- 6 files changed, 42 insertions(+), 5 deletions(-) delete mode 100644 tests/queries/0_stateless/00967_ubsan_bit_test.reference delete mode 100644 tests/queries/0_stateless/00967_ubsan_bit_test.sql diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 78ec9c8b773..f4c90a0c603 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int PARAMETER_OUT_OF_BOUND; } namespace @@ -21,12 +22,21 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else - return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; + { + const auto max_position = decltype(b)((8 * sizeof(a)) - 1); + if (b > max_position || b < 0) + { + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument needs to a positive value and less or equal to {} for integer {}", + static_cast(max_position), static_cast(a)); + } + return (a >> b) & 1; + } } #if USE_EMBEDDED_COMPILER diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.reference b/tests/queries/0_stateless/00967_ubsan_bit_test.reference deleted file mode 100644 index 573541ac970..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.reference +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.sql b/tests/queries/0_stateless/00967_ubsan_bit_test.sql deleted file mode 100644 index 1682e725670..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT sum(ignore(bitTest(number, 65))) FROM numbers(10); diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index 708c5d9d994..ee35e683ed1 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -198,3 +198,27 @@ 97 1 98 1 99 1 +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +8 1 +9 0 +10 1 +11 0 +12 1 +13 0 +14 1 +15 0 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 82e2c5a2380..324768b2e1d 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,2 +1,7 @@ SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); + +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); +SELECT -number, bitTest(toUInt16(1), -number) FROM numbers(8); -- { serverError PARAMETER_OUT_OF_BOUND } diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index d0177da84d2..6c598bce440 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -16,7 +16,7 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; -- fuzz crash https://github.com/ClickHouse/ClickHouse/issues/37151 -SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 65535), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, -9223372036854775808), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; +SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 0), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, 0), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; drop table d; From d63f70e4f1459bbb7808fd1fb7202e9648ad6570 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 27 Jun 2024 10:41:51 +0000 Subject: [PATCH 051/140] Add bound check to bitTestAll and bitTestAny --- src/Functions/FunctionBitTestMany.h | 13 +- src/Functions/bitTest.cpp | 8 +- .../01082_bit_test_out_of_bound.reference | 184 ------------------ .../01082_bit_test_out_of_bound.sql | 6 +- 4 files changed, 20 insertions(+), 191 deletions(-) diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 71e94b1e71d..19ece2ae9e5 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -17,6 +17,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int PARAMETER_OUT_OF_BOUND; } @@ -146,6 +147,9 @@ private: const auto pos = pos_col_const->getUInt(0); if (pos < 8 * sizeof(ValueType)) mask = mask | (ValueType(1) << pos); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); } else { @@ -186,13 +190,20 @@ private: for (const auto i : collections::range(0, mask.size())) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos[i])); return true; } else if (const auto pos_col_const = checkAndGetColumnConst>(pos_col_untyped)) { const auto & pos = pos_col_const->template getValue(); - const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0; + if (pos >= 8 * sizeof(ValueType)) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); + + const auto new_mask = ValueType(1) << pos; for (const auto i : collections::range(0, mask.size())) mask[i] = mask[i] | new_mask; diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index f4c90a0c603..1223ef7cbbb 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -28,14 +28,14 @@ struct BitTestImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else { + typename NumberTraits::ToInteger::Type a_int(a); + typename NumberTraits::ToInteger::Type b_int(b); const auto max_position = decltype(b)((8 * sizeof(a)) - 1); if (b > max_position || b < 0) - { throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "The bit position argument needs to a positive value and less or equal to {} for integer {}", - static_cast(max_position), static_cast(a)); - } - return (a >> b) & 1; + std::to_string(max_position), std::to_string(a_int)); + return (a_int >> b_int) & 1; } } diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index ee35e683ed1..cf12c6b0b1c 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -6,98 +6,6 @@ 5 0 6 1 7 0 -8 0 -9 0 -10 0 -11 0 -12 0 -13 0 -14 0 -15 0 -16 0 -17 0 -18 0 -19 0 -20 0 -21 0 -22 0 -23 0 -24 0 -25 0 -26 0 -27 0 -28 0 -29 0 -30 0 -31 0 -32 0 -33 0 -34 0 -35 0 -36 0 -37 0 -38 0 -39 0 -40 0 -41 0 -42 0 -43 0 -44 0 -45 0 -46 0 -47 0 -48 0 -49 0 -50 0 -51 0 -52 0 -53 0 -54 0 -55 0 -56 0 -57 0 -58 0 -59 0 -60 0 -61 0 -62 0 -63 0 -64 0 -65 0 -66 0 -67 0 -68 0 -69 0 -70 0 -71 0 -72 0 -73 0 -74 0 -75 0 -76 0 -77 0 -78 0 -79 0 -80 0 -81 0 -82 0 -83 0 -84 0 -85 0 -86 0 -87 0 -88 0 -89 0 -90 0 -91 0 -92 0 -93 0 -94 0 -95 0 -96 0 -97 0 -98 0 -99 0 0 1 1 0 2 1 @@ -106,98 +14,6 @@ 5 0 6 1 7 0 -8 1 -9 1 -10 1 -11 1 -12 1 -13 1 -14 1 -15 1 -16 1 -17 1 -18 1 -19 1 -20 1 -21 1 -22 1 -23 1 -24 1 -25 1 -26 1 -27 1 -28 1 -29 1 -30 1 -31 1 -32 1 -33 1 -34 1 -35 1 -36 1 -37 1 -38 1 -39 1 -40 1 -41 1 -42 1 -43 1 -44 1 -45 1 -46 1 -47 1 -48 1 -49 1 -50 1 -51 1 -52 1 -53 1 -54 1 -55 1 -56 1 -57 1 -58 1 -59 1 -60 1 -61 1 -62 1 -63 1 -64 1 -65 1 -66 1 -67 1 -68 1 -69 1 -70 1 -71 1 -72 1 -73 1 -74 1 -75 1 -76 1 -77 1 -78 1 -79 1 -80 1 -81 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -88 1 -89 1 -90 1 -91 1 -92 1 -93 1 -94 1 -95 1 -96 1 -97 1 -98 1 -99 1 0 1 1 0 2 1 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 324768b2e1d..92ece2a4aa4 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,5 +1,7 @@ -SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); -SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } From 7340d9adf8b0d70479717143e183b3f19222085b Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 28 Jun 2024 13:29:26 +0200 Subject: [PATCH 052/140] add transactionID --- .../functions/other-functions.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 58fc1eba02e..738a12143a3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3860,3 +3860,45 @@ Result: └───────────────┘ ``` +## transactionID + +Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting in `config.d/transactions.xml`: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionID() +``` + +**Returned value** + +- TransactionID. [String](../data-types/string.md). [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md), [UUID](../data-types/uuid.md)). + +**Example** + +Query: + +```sql +SELECT transactionID(); +``` + +Result: + +```response +┌─transactionID()──────────────────────────────┐ +│ (0,0,'00000000-0000-0000-0000-000000000000') │ +└──────────────────────────────────────────────┘ +``` + From ce330d54cf9e75defa98fa8fa6e16b17252441ad Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 13:38:32 +0200 Subject: [PATCH 053/140] Review fixes (partial) --- .../DataLakes/DeltaLakeMetadata.cpp | 41 +++++++++++-------- .../DataLakes/IStorageDataLake.h | 19 ++++++--- .../StorageObjectStorageSource.cpp | 1 - 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bd3e21f12fd..79cd48e7aab 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -168,14 +168,20 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ + + /// Read metadata file and fill `file_schema`, `file_parition_columns`, `result`. + /// `result` is a list of data files. + /// `file_schema` is a common schema for all files. + /// Schema evolution is not supported, so we check that all files have the same schema. + /// `file_partiion_columns` is information about parition columns of data files. void processMetadataFile( - const String & key, + const String & metadata_file_path, NamesAndTypesList & file_schema, DataLakePartitionColumns & file_partition_columns, std::set & result) { auto read_settings = context->getReadSettings(); - auto buf = object_storage->readObject(StoredObject(key), read_settings); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); char c; while (!buf->eof()) @@ -197,9 +203,9 @@ struct DeltaLakeMetadata::Impl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - object->stringify(oss); - LOG_TEST(log, "Metadata: {}", oss.str()); + // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + // object->stringify(oss); + // LOG_TEST(log, "Metadata: {}", oss.str()); if (object->has("add")) { @@ -211,21 +217,24 @@ struct DeltaLakeMetadata::Impl auto it = file_partition_columns.find(filename); if (it == file_partition_columns.end()) { - auto partition_values = add_object->get("partitionValues").extract(); - if (partition_values->size()) + if (add_object->has("partitionValues")) { - auto & current_partition_columns = file_partition_columns[filename]; - for (const auto & partition_name : partition_values->getNames()) + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) { - const auto value = partition_values->getValue(partition_name); - auto name_and_type = file_schema.tryGetByName(partition_name); - if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); - auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); - LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } } } } diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 97fb9890490..ab069364021 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -99,14 +99,16 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - auto partition_columns = new_metadata->getPartitionColumns(); - - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; - if (current_metadata && *current_metadata == *new_metadata) return; + if (!current_metadata) + { + auto partition_columns = new_metadata->getPartitionColumns(); + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; + } + current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); @@ -127,6 +129,13 @@ public: { base_configuration->format = Storage::configuration->format; } + + if (current_metadata) + { + auto partition_columns = current_metadata->getPartitionColumns(); + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; + } } private: diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6a5957c405b..b99ac466081 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -229,7 +229,6 @@ Chunk StorageObjectStorageSource::generate() } } - return chunk; } From f25147aefd7cef1ed18696df57a60369120229f8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 14:57:38 +0200 Subject: [PATCH 054/140] Review fixes --- .../ObjectStorage/DataLakes/IStorageDataLake.h | 14 ++++---------- .../ObjectStorage/StorageObjectStorage.cpp | 11 ++++------- src/Storages/ObjectStorage/StorageObjectStorage.h | 5 ++++- .../ObjectStorage/StorageObjectStorageSource.cpp | 5 ++--- .../ObjectStorage/StorageObjectStorageSource.h | 4 +--- 5 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index ab069364021..f1217bc9729 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -102,16 +102,10 @@ public: if (current_metadata && *current_metadata == *new_metadata) return; - if (!current_metadata) - { - auto partition_columns = new_metadata->getPartitionColumns(); - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; - } - current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); + updated_configuration->setPartitionColumns(current_metadata->getPartitionColumns()); Storage::configuration = updated_configuration; } @@ -132,9 +126,9 @@ public: if (current_metadata) { - auto partition_columns = current_metadata->getPartitionColumns(); - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; + const auto & columns = current_metadata->getPartitionColumns(); + base_configuration->setPartitionColumns(columns); + Storage::configuration->setPartitionColumns(columns); } } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 4e3eeeb17f2..683473006e3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -111,8 +111,7 @@ public: const bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_, - const DataLakePartitionColumns & partition_columns_) + size_t num_streams_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) @@ -124,7 +123,6 @@ public: , max_block_size(max_block_size_) , num_streams(num_streams_) , distributed_processing(distributed_processing_) - , partition_columns(partition_columns_) { } @@ -163,7 +161,7 @@ public: { auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count, partition_columns); + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -192,7 +190,6 @@ private: const size_t max_block_size; size_t num_streams; const bool distributed_processing; - DataLakePartitionColumns partition_columns; void createIterator(const ActionsDAG::Node * predicate) { @@ -252,8 +249,7 @@ void StorageObjectStorage::read( need_only_count, local_context, max_block_size, - num_streams, - partition_columns); + num_streams); query_plan.addStep(std::move(read_step)); } @@ -464,6 +460,7 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) format = other.format; compression_method = other.compression_method; structure = other.structure; + partition_columns = other.partition_columns; } bool StorageObjectStorage::Configuration::withPartitionWildcard() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index fba91edf6f7..c93a0bf6943 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -136,7 +136,6 @@ protected: const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; - mutable DataLakePartitionColumns partition_columns; LoggerPtr log; }; @@ -196,6 +195,9 @@ public: virtual ConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } + void setPartitionColumns(const DataLakePartitionColumns & columns) { partition_columns = columns; } + const DataLakePartitionColumns & getPartitionColumns() const { return partition_columns; } + String format = "auto"; String compression_method = "auto"; String structure = "auto"; @@ -207,6 +209,7 @@ protected: void assertInitialized() const; bool initialized = false; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b99ac466081..6bded90f11d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -49,8 +49,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_, - const DataLakePartitionColumns & partition_columns_) + bool need_only_count_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -69,7 +68,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) - , partition_columns(partition_columns_) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -206,6 +204,7 @@ Chunk StorageObjectStorageSource::generate() .last_modified = object_info->metadata->last_modified }); + const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 566cc687400..c16619b34d8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -40,8 +40,7 @@ public: UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_, - const DataLakePartitionColumns & partition_columns_ = {}); + bool need_only_count_); ~StorageObjectStorageSource() override; @@ -83,7 +82,6 @@ protected: bool initialized = false; size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); - DataLakePartitionColumns partition_columns; struct ReaderHolder : private boost::noncopyable { From fbdbbf5a782a5fb032dacf880ade1ebc505e8dea Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:17:35 +0200 Subject: [PATCH 055/140] Fix typo --- src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 79cd48e7aab..3b6cbca5d46 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -173,7 +173,7 @@ struct DeltaLakeMetadata::Impl /// `result` is a list of data files. /// `file_schema` is a common schema for all files. /// Schema evolution is not supported, so we check that all files have the same schema. - /// `file_partiion_columns` is information about parition columns of data files. + /// `file_partiion_columns` is information about partition columns of data files. void processMetadataFile( const String & metadata_file_path, NamesAndTypesList & file_schema, From 9864508b052ba58f25431d4fbaab9a27ee658919 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 17:51:08 +0200 Subject: [PATCH 056/140] Fix --- src/Interpreters/ObjectStorageQueueLog.cpp | 3 --- .../ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h | 1 - src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp | 5 ----- src/Storages/System/StorageSystemS3Queue.cpp | 3 --- 4 files changed, 12 deletions(-) diff --git a/src/Interpreters/ObjectStorageQueueLog.cpp b/src/Interpreters/ObjectStorageQueueLog.cpp index 24261429434..c841984fd08 100644 --- a/src/Interpreters/ObjectStorageQueueLog.cpp +++ b/src/Interpreters/ObjectStorageQueueLog.cpp @@ -36,7 +36,6 @@ ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription() {"status", status_datatype, "Status of the processing file"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, {"exception", std::make_shared(), "Exception message if happened"}, }; } @@ -64,8 +63,6 @@ void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const else columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(counters_snapshot, columns[i++].get(), true); - columns[i++]->insert(exception); } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index 652b4742389..f0e55c202a2 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -34,7 +34,6 @@ public: std::atomic processing_start_time = 0; std::atomic processing_end_time = 0; std::atomic retries = 0; - ProfileEvents::Counters profile_counters; private: mutable std::mutex last_exception_mutex; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 2effbe7e7c2..5087755f9e7 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -513,10 +513,6 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } - auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); - SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); - /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? - try { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); @@ -715,7 +711,6 @@ void ObjectStorageQueueSource::appendLogElement( .file_name = filename, .rows_processed = processed_rows, .status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed, - .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), .processing_start_time = file_status_.processing_start_time, .processing_end_time = file_status_.processing_end_time, .exception = file_status_.getException(), diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index c1d686067fd..a1c9380f616 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -32,7 +32,6 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, {"exception", std::make_shared(), "Exception which happened during processing"}, }; } @@ -65,8 +64,6 @@ void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, co else res_columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(file_status->profile_counters.getPartiallyAtomicSnapshot(), res_columns[i++].get(), true); - res_columns[i++]->insert(file_status->getException()); } } From c63263fe394290e88ff34035f9213a3aa80290ef Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 17:56:51 +0200 Subject: [PATCH 057/140] One more --- src/Interpreters/ObjectStorageQueueLog.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/ObjectStorageQueueLog.h b/src/Interpreters/ObjectStorageQueueLog.h index b0e843a0cc3..669238d8dbb 100644 --- a/src/Interpreters/ObjectStorageQueueLog.h +++ b/src/Interpreters/ObjectStorageQueueLog.h @@ -26,7 +26,6 @@ struct ObjectStorageQueueLogElement Failed, }; ObjectStorageQueueStatus status; - ProfileEvents::Counters::Snapshot counters_snapshot; time_t processing_start_time; time_t processing_end_time; std::string exception; From 95932db17056a1c7b1ebc65fa269affd8004877d Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 28 Jun 2024 22:01:12 +0200 Subject: [PATCH 058/140] Add missing workload identity changes --- .../ObjectStorage/Azure/Configuration.cpp | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index 163f08be420..f9a2a59e4a0 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -179,6 +180,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a } std::unique_ptr blob_service_client; + size_t pos = configuration.connection_url.find('?'); std::shared_ptr managed_identity_credential; if (storage_shared_key_credential) { @@ -186,12 +188,20 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a } else { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, managed_identity_credential); + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); + } + else + { + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); + } } std::string final_url; - size_t pos = connection_url.find('?'); if (pos != std::string::npos) { auto url_without_sas = connection_url.substr(0, pos); @@ -216,7 +226,16 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url, managed_identity_credential); + { + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + result = std::make_unique(final_url, workload_identity_credential); + } + else + result = std::make_unique(final_url, managed_identity_credential); + } } else { @@ -236,7 +255,16 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url, managed_identity_credential); + { + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + result = std::make_unique(final_url, workload_identity_credential); + } + else + result = std::make_unique(final_url, managed_identity_credential); + } } else { From d4b71ea4cbacb614b35ac6cd3fd07a0c299e3415 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 23:09:08 +0000 Subject: [PATCH 059/140] fix settings changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index c8ddf23ba08..a4883e3f209 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -153,7 +154,6 @@ static const std::map Date: Sat, 29 Jun 2024 01:38:01 +0000 Subject: [PATCH 060/140] unload primary key of oudated parts --- src/Storages/MergeTree/MergeTreeData.cpp | 25 +++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 7 ++++ .../ReplicatedMergeTreeCleanupThread.cpp | 2 ++ src/Storages/StorageMergeTree.cpp | 1 + ...3198_unload_primary_key_outdated.reference | 3 ++ .../03198_unload_primary_key_outdated.sh | 32 +++++++++++++++++++ 6 files changed, 70 insertions(+) create mode 100644 tests/queries/0_stateless/03198_unload_primary_key_outdated.reference create mode 100755 tests/queries/0_stateless/03198_unload_primary_key_outdated.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f9cc65871fe..f02531c83a8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8606,6 +8606,31 @@ void MergeTreeData::unloadPrimaryKeys() } } +size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() +{ + size_t total_unloaded = 0; + + /// If the method is already called from another thread, then we don't need to do anything. + std::unique_lock lock(unload_primary_key_mutex, std::defer_lock); + if (!lock.try_lock()) + return total_unloaded; + + auto parts_lock = lockParts(); + auto parts_range = getDataPartsStateRange(DataPartState::Outdated); + + for (const auto & part : parts_range) + { + /// Outdated part may be hold by SELECT query and still needs the index. + if (part.unique()) + { + ++total_unloaded; + const_cast(*part).unloadIndex(); + LOG_TEST(log, "Unloaded primary key for outdated part {}", part->name); + } + } + return total_unloaded; +} + void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) { /// Aggregate functions already forbidden, but SimpleAggregateFunction are not diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c6f736a4afd..9ca2ea4f312 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1096,8 +1096,13 @@ public: static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + /// Unloads primary keys of all parts. void unloadPrimaryKeys(); + /// Unloads primary keys of outdated parts that are not used by any query. + /// Returns the number of parts for which index was unloaded. + size_t unloadPrimaryKeysOfOutdatedParts(); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; @@ -1260,6 +1265,8 @@ protected: std::mutex grab_old_parts_mutex; /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; + /// The same for unloadPrimaryKeysOfOutdatedParts. + std::mutex unload_primary_key_mutex; void checkProperties( const StorageInMemoryMetadata & new_metadata, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index e034918ef57..328c03a5b94 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -175,6 +175,8 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() cleaned_part_like += storage.clearEmptyParts(); } + cleaned_part_like += storage.unloadPrimaryKeysOfOutdatedParts(); + /// We need to measure the number of removed objects somehow (for better scheduling), /// but just summing the number of removed async blocks, logs, and empty parts does not make any sense. /// So we are trying to (approximately) measure the number of inserted blocks/parts, so we will be able to compare apples to apples. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9255ee00340..94d7a33d0dd 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1457,6 +1457,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldPartsFromFilesystem(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); + cleared_count += unloadPrimaryKeysOfOutdatedParts(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference new file mode 100644 index 00000000000..28655f938ba --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference @@ -0,0 +1,3 @@ +all_1_1_0 1 16 +all_1_1_0 0 0 +all_1_1_0_2 1 16 diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh new file mode 100755 index 00000000000..4f217935123 --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n " + DROP TABLE IF EXISTS t_unload_primary_key; + + CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) + ENGINE = MergeTree ORDER BY a + SETTINGS old_parts_lifetime = 10000; + + INSERT INTO t_unload_primary_key VALUES (1, 1); + + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + + ALTER TABLE t_unload_primary_key UPDATE b = 100 WHERE 1 SETTINGS mutations_sync = 2; +" + +for _ in {1..100}; do + res=$($CLICKHOUSE_CLIENT -q "SELECT primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' AND name = 'all_1_1_0'") + if [[ $res -eq 0 ]]; then + break + fi + sleep 0.3 +done + +$CLICKHOUSE_CLIENT -n " + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + DROP TABLE IF EXISTS t_unload_primary_key; +" From db0beabb1c5e424b47f5031380aba1f946afbe57 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 29 Jun 2024 02:31:24 +0000 Subject: [PATCH 061/140] less load under mutex --- src/Storages/MergeTree/MergeTreeData.cpp | 29 ++++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f02531c83a8..4add66e1ecd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8608,27 +8608,32 @@ void MergeTreeData::unloadPrimaryKeys() size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() { - size_t total_unloaded = 0; - /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(unload_primary_key_mutex, std::defer_lock); if (!lock.try_lock()) - return total_unloaded; + return 0; - auto parts_lock = lockParts(); - auto parts_range = getDataPartsStateRange(DataPartState::Outdated); + DataPartsVector parts_to_unload_index; - for (const auto & part : parts_range) { - /// Outdated part may be hold by SELECT query and still needs the index. - if (part.unique()) + auto parts_lock = lockParts(); + auto parts_range = getDataPartsStateRange(DataPartState::Outdated); + + for (const auto & part : parts_range) { - ++total_unloaded; - const_cast(*part).unloadIndex(); - LOG_TEST(log, "Unloaded primary key for outdated part {}", part->name); + /// Outdated part may be hold by SELECT query and still needs the index. + if (part.unique()) + parts_to_unload_index.push_back(part); } } - return total_unloaded; + + for (const auto & part : parts_to_unload_index) + { + const_cast(*part).unloadIndex(); + LOG_TEST(log, "Unloaded primary key for outdated part {}", part->name); + } + + return parts_to_unload_index.size(); } void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) From cf2f2cfc1c67c74b0ef95d10b325df95b392beab Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sat, 29 Jun 2024 14:14:36 +0000 Subject: [PATCH 062/140] use writeBinaryLittleEndian Signed-off-by: Duc Canh Le --- .../RocksDB/EmbeddedRocksDBBulkSink.cpp | 29 +++++++------------ .../0_stateless/02956_rocksdb_with_ttl.sql | 1 + 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b444da97a3..05ab46218af 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -170,33 +170,20 @@ std::pair EmbeddedRocksDBBulkSink::seriali FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row /// TTL handling - [[maybe_unused]] auto encode_fixed_32 = [](char * dst, int32_t value) + [[maybe_unused]] auto get_rocksdb_ts = [this](String & ts_string) { - if constexpr (std::endian::native == std::endian::little) - { - memcpy(dst, &value, sizeof(value)); - } - else - { - dst[0] = value & 0xff; - dst[1] = (value >> 8) & 0xff; - dst[2] = (value >> 16) & 0xff; - dst[3] = (value >> 24) & 0xff; - } - }; - [[maybe_unused]] auto get_rocksdb_ts = [this, &encode_fixed_32](char * ts_string) - { - int64_t curtime = -1; + Int64 curtime = -1; auto * system_clock = storage.rocksdb_ptr->GetEnv()->GetSystemClock().get(); rocksdb::Status st = system_clock->GetCurrentTime(&curtime); if (!st.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB error: {}", st.ToString()); - encode_fixed_32(ts_string, static_cast(curtime)); + WriteBufferFromString buf(ts_string); + writeBinaryLittleEndian(static_cast(curtime), buf); }; for (auto && chunk : input_chunks) { - [[maybe_unused]] char ts_string[4]; + [[maybe_unused]] String ts_string; if constexpr (with_timestamp) get_rocksdb_ts(ts_string); @@ -209,7 +196,11 @@ std::pair EmbeddedRocksDBBulkSink::seriali /// Append timestamp to end of value, see rocksdb::DBWithTTLImpl::AppendTS if constexpr (with_timestamp) - writeString(ts_string, 4, writer_value); + { + if (ts_string.size() != sizeof(Int32)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid timestamp size: expect 4, got {}", ts_string.size()); + writeString(ts_string, writer_value); + } /// String in ColumnString must be null-terminated writeChar('\0', writer_key); diff --git a/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql index 88259a38d4c..01efe19cf45 100644 --- a/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql +++ b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql @@ -1,5 +1,6 @@ -- Tags: no-ordinary-database, use-rocksdb +-- TTL = 2s CREATE TABLE dict_with_ttl (key UInt64, value String) ENGINE = EmbeddedRocksDB(2) PRIMARY KEY (key); INSERT INTO dict_with_ttl VALUES (0, 'foo'); -- Data inserted correctly From 238b65cf26255bcc13baba4b5601aa760574c6b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2024 01:41:39 +0200 Subject: [PATCH 063/140] Fix trash --- .../AzureBlobStorage/AzureObjectStorage.cpp | 1 - .../ObjectStorages/ObjectStorageIterator.h | 23 ++++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 86a035f3be7..6c0fc403f04 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -60,7 +60,6 @@ public: "ListObjectAzure") , client(client_) { - options.Prefix = path_prefix; options.PageSizeHint = static_cast(max_list_size); } diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 026b9c4fcd7..d814514ddcc 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -12,23 +12,12 @@ public: /// Moves iterator to the next element. If the iterator not isValid, the behavior is undefined. virtual void next() = 0; - /// Skips all the remaining elements in the current batch (if any), - /// and moves the iterator to the first element of the next batch, - /// or, if there is no more batches, the iterator becomes invalid. - /// If the iterator not isValid, the behavior is undefined. - virtual void nextBatch() = 0; - /// Check if the iterator is valid, which means the `current` method can be called. virtual bool isValid() = 0; /// Return the current element. virtual RelativePathWithMetadataPtr current() = 0; - /// Return the current batch of elements. - /// It is unspecified how batches are formed. - /// But this method can be used for more efficient processing. - virtual RelativePathsWithMetadata currentBatch() = 0; - /// This will initiate prefetching the next batch in background, so it can be obtained faster when needed. virtual std::optional getCurrentBatchAndScheduleNext() = 0; @@ -36,6 +25,18 @@ public: virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; + +private: + /// Skips all the remaining elements in the current batch (if any), + /// and moves the iterator to the first element of the next batch, + /// or, if there is no more batches, the iterator becomes invalid. + /// If the iterator not isValid, the behavior is undefined. + virtual void nextBatch() = 0; + + /// Return the current batch of elements. + /// It is unspecified how batches are formed. + /// But this method can be used for more efficient processing. + virtual RelativePathsWithMetadata currentBatch() = 0; }; using ObjectStorageIteratorPtr = std::shared_ptr; From cbe0eb83dbad9b4b31d886f7ddc2c019a8298989 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2024 03:39:14 +0200 Subject: [PATCH 064/140] Fix error --- .../ObjectStorageIteratorAsync.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index ff94e878586..2d2e8cd2c1a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -59,11 +59,19 @@ void IObjectStorageIteratorAsync::nextBatch() current_batch = std::move(result.batch); current_batch_iterator = current_batch.begin(); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); + if (current_batch.empty()) + { + is_finished = true; + has_next_batch = false; + } + else + { + accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - has_next_batch = result.has_next; - if (has_next_batch) - outcome_future = scheduleBatch(); + has_next_batch = result.has_next; + if (has_next_batch) + outcome_future = scheduleBatch(); + } } catch (...) { From 047dcfb8060b41de4f46294f21f99082fc2481a9 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 1 Jul 2024 04:39:53 +0000 Subject: [PATCH 065/140] fix undefined LOGICAL_ERROR Signed-off-by: Duc Canh Le --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 05ab46218af..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -33,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int ROCKSDB_ERROR; + extern const int LOGICAL_ERROR; } static const IColumn::Permutation & getAscendingPermutation(const IColumn & column, IColumn::Permutation & perm) From 4fcafde610874dc6a29d6d7dbab4dc19e2a42d54 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 1 Jul 2024 15:29:32 +0800 Subject: [PATCH 066/140] allow empty arguments for concat --- src/Functions/concat.cpp | 36 +++++++++---------- .../0_stateless/00727_concat.reference | 3 ++ tests/queries/0_stateless/00727_concat.sql | 4 ++- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 68cfcdb8d90..b011c33e02a 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -46,25 +46,30 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() < 2) - throw Exception( - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Number of arguments for function {} doesn't match: passed {}, should be at least 2", - getName(), - arguments.size()); + if (arguments.size() == 1) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} should not be 1", getName()); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.empty()) + { + auto res_data = ColumnString::create(); + res_data->insertDefault(); + return ColumnConst::create(std::move(res_data), input_rows_count); + } + else if (arguments.size() == 1) + return arguments[0].column; /// Format function is not proven to be faster for two arguments. /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl. /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%). /// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%). - if (arguments.size() == 2) + else if (arguments.size() == 2) return executeBinary(arguments, input_rows_count); - return executeFormatImpl(arguments, input_rows_count); + else + return executeFormatImpl(arguments, input_rows_count); } private: @@ -209,11 +214,11 @@ public: { if (arguments.size() == 1) return FunctionFactory::instance().getImpl("toString", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); })) return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); })) return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); })) return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments); return std::make_unique( FunctionConcat::create(context), @@ -221,15 +226,8 @@ public: return_type); } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { - if (arguments.empty()) - throw Exception( - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", - getName(), - arguments.size()); - /// We always return Strings from concat, even if arguments were fixed strings. return std::make_shared(); } diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference index 6fb23c072d3..329ad36ad3c 100644 --- a/tests/queries/0_stateless/00727_concat.reference +++ b/tests/queries/0_stateless/00727_concat.reference @@ -72,3 +72,6 @@ foo \N \N Testing the alias +-- Empty argument tests + +String diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql index 01792545b5a..76dae541261 100644 --- a/tests/queries/0_stateless/00727_concat.sql +++ b/tests/queries/0_stateless/00727_concat.sql @@ -93,4 +93,6 @@ SELECT concat(materialize(NULL :: Nullable(UInt64))); SELECT CONCAT('Testing the ', 'alias'); -SELECT concat(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +SELECT '-- Empty argument tests'; +SELECT concat(); +select toTypeName(concat()); From 98293b16249b21b4a69da49524c1dffce3fc5fb2 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Mon, 1 Jul 2024 07:31:57 +0000 Subject: [PATCH 067/140] Max sessions for user tests improvements --- .../test.py | 6 ++-- .../02832_alter_max_sessions_for_user.sh | 36 ++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 133991fed7a..a2fa77e8dc9 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -7,7 +7,7 @@ import pytest import sys import threading -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_logs_contain_with_retry from helpers.uclient import client, prompt @@ -51,7 +51,7 @@ instance = cluster.add_instance( def get_query(name, id): - return f"SElECT '{name}', {id}, number from system.numbers" + return f"SELECT '{name}', {id}, COUNT(*) from system.numbers" def grpc_get_url(): @@ -90,7 +90,7 @@ def threaded_run_test(sessions): if len(sessions) > MAX_SESSIONS_FOR_USER: # High retry amount to avoid flakiness in ASAN (+Analyzer) tests assert_logs_contain_with_retry( - instance, "overflown session count", retry_count=60 + instance, "overflown session count", retry_count=120 ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index a3b0d17f1be..87fbffdb1e6 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" USER2="02832_alter_max_sessions_user_two_$$" @@ -15,6 +17,26 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" +function run_sessions_set() +{ + local sessions_count="$1" + local session_check="$2" + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + local query_id="${QUERY_ID_PREFIX}_${i}" + # Write only expected error text + # More than alter_sessions_count queries will not start. + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + done + + wait +} + function test_alter_profile() { local max_session_count="$1" @@ -24,23 +46,13 @@ function test_alter_profile() ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" # Create sessions with $max_session_count restriction - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # Skip output from this query - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null - done + run_sessions_set $max_session_count 0 # Update restriction to $alter_sessions_count ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # ignore select 1, we need only errors - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & - done - - wait + run_sessions_set $max_session_count 1 } test_alter_profile 1 1 From bb8fc59d7c3b0dcfb59d1e31bafb11bd302211d7 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 1 Jul 2024 15:36:08 +0800 Subject: [PATCH 068/140] update doc --- docs/en/sql-reference/functions/string-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 31d89060882..6b95796db03 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -734,13 +734,13 @@ concat(s1, s2, ...) **Arguments** -At least one value of arbitrary type. +Values of arbitrary type. Empty argument is also allowed. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** -The String created by concatenating the arguments. +The String created by concatenating the arguments. If there are no arguments, the function returns empty string. If any of arguments is `NULL`, the function returns `NULL`. From 0ad049d2e5a2243993144bad7c8c789f961dd765 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 1 Jul 2024 08:55:11 +0000 Subject: [PATCH 069/140] Update version_date.tsv and changelogs after v24.6.1.4423-stable --- SECURITY.md | 1 + docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.6.1.4423-stable.md | 735 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 + 6 files changed, 741 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.6.1.4423-stable.md diff --git a/SECURITY.md b/SECURITY.md index 8635951dc0e..53328b6e16b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,6 +14,7 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.6 | ✔️ | | 24.5 | ✔️ | | 24.4 | ✔️ | | 24.3 | ✔️ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 24f38740ff5..018fe57bf56 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.5.3.5" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index c71319a2a7e..a86406e5129 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.5.3.5" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index ed8cf3d657d..25f3273a648 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.5.3.5" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.6.1.4423-stable.md b/docs/changelogs/v24.6.1.4423-stable.md new file mode 100644 index 00000000000..f7af9cbaf8d --- /dev/null +++ b/docs/changelogs/v24.6.1.4423-stable.md @@ -0,0 +1,735 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.1.4423-stable (dcced7c8478) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1) + +#### Backward Incompatible Change +* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)). +* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)). +* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)). +* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)). +* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)). +* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)). +* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)). +* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)). +* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)). +* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)). +* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)). +* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)). +* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)). +* Added support for reading LINESTRING geometry in WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)). +* Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)). +* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)). +* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)). +* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)). +* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). +* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)). +* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)). +* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)). +* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)). +* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)). +* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). +* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). + +#### Performance Improvement +* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)). +* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)). +* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)). +* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)). +* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)). +* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)). +* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)). +* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)). +* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)). +* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce redundant calls to `isDefault()` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)). +* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)). +* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)). +* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)). +* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)). +* Hot reload storage policy for distributed tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)). +* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)). +* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)). +* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)). +* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)). +* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)). +* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)). +* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)). +* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)). +* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)). +* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)). +* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)). +* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)). +* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)). +* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)). +* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)). +* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)). +* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)). +* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)). +* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)). +* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)). +* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)). +* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)). +* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)). +* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)). +* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)). +* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)). +* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)). +* Add metrics to track the number of directories created and removed by the plain_rewritable metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)). +* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)). +* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)). +* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)). +* Better Exception Message in Delete Table with Projection, users can understand the error and the steps should be taken. [#64212](https://github.com/ClickHouse/ClickHouse/pull/64212) ([jsc0218](https://github.com/jsc0218)). +* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)). +* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)). +* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)). +* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)). +* Added new configuration input_format_parquet_prefer_block_bytes to control the average output block bytes, and modified the default value of input_format_parquet_max_block_size to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)). +* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Settings from user config doesn't affect merges and mutations for MergeTree on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)). +* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). +* Improve the iterator of sparse column to reduce call of size(). [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)). +* Update condition to use copy for azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)). +* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)). +* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). +* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)). +* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)). +* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Improve progress report on zeros_mt and generateRandom. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)). +* Add an asynchronous metric jemalloc.profile.active to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)). +* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)). +* Don't mark of `allow_experimental_join_condition` as IMPORTANT. This may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65716](https://github.com/ClickHouse/ClickHouse/issues/65716): `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)). +* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). +* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Initialize global trace collector for Poco::ThreadPool (needed for keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add validation when creating a user with bcrypt_hash. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). +* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65846](https://github.com/ClickHouse/ClickHouse/issues/65846): Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Closes [#65355](https://github.com/ClickHouse/ClickHouse/issues/65355). Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65714](https://github.com/ClickHouse/ClickHouse/issues/65714): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)). +* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)). +* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)). +* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)). +* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)). +* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)). +* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)). +* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)). +* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)). +* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)). +* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)). +* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)). +* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)). +* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)). +* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)). +* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)). +* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)). +* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)). +* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)). +* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)). +* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)). +* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)). +* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)). +* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)). +* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)). +* Backported in [#65544](https://github.com/ClickHouse/ClickHouse/issues/65544): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#65582](https://github.com/ClickHouse/ClickHouse/issues/65582): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)). +* Backported in [#65734](https://github.com/ClickHouse/ClickHouse/issues/65734): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)). +* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. fix ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)). +* Backported in [#65665](https://github.com/ClickHouse/ClickHouse/issues/65665): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65606](https://github.com/ClickHouse/ClickHouse/issues/65606): Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#65790](https://github.com/ClickHouse/ClickHouse/issues/65790): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65814](https://github.com/ClickHouse/ClickHouse/issues/65814): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65830](https://github.com/ClickHouse/ClickHouse/issues/65830): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). + +#### Build/Testing/Packaging Improvement +* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `network` service be required when using the rc init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)). +* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)). +* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)). +* Fix typo in test_hdfsCluster_unset_skip_unavailable_shards. The test writes data to unskip_unavailable_shards, but uses skip_unavailable_shards from the previous test. [#64243](https://github.com/ClickHouse/ClickHouse/pull/64243) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) ([Raúl Marín](https://github.com/Algunenano)). +* Reduce the size of some slow tests. [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)). +* Fix test_lost_part_other_replica. [#64512](https://github.com/ClickHouse/ClickHouse/pull/64512) ([Raúl Marín](https://github.com/Algunenano)). +* Add tests for experimental unequal joins and randomize new settings in clickhouse-test. [#64535](https://github.com/ClickHouse/ClickHouse/pull/64535) ([Nikita Fomichev](https://github.com/fm4v)). +* Upgrade tests: Update config and work with release candidates. [#64542](https://github.com/ClickHouse/ClickHouse/pull/64542) ([Raúl Marín](https://github.com/Algunenano)). +* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) ([Tomer Shafir](https://github.com/tomershafir)). +* Speed up 02995_forget_partition. [#64761](https://github.com/ClickHouse/ClickHouse/pull/64761) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 02790_async_queries_in_query_log. [#64764](https://github.com/ClickHouse/ClickHouse/pull/64764) ([Raúl Marín](https://github.com/Algunenano)). +* Support LLVM XRay on Linux amd64 only. [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)). +* Get rid of custom code in `tests/ci/download_release_packages.py` and `tests/ci/get_previous_release_tag.py` to avoid issues after the https://github.com/ClickHouse/ClickHouse/pull/64759 is merged. [#64848](https://github.com/ClickHouse/ClickHouse/pull/64848) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL CATEGORY + +* Backported in [#65568](https://github.com/ClickHouse/ClickHouse/issues/65568):. [#65498](https://github.com/ClickHouse/ClickHouse/pull/65498) ([Sergei Trifonov](https://github.com/serxa)). +* Backported in [#65693](https://github.com/ClickHouse/ClickHouse/issues/65693):. [#65686](https://github.com/ClickHouse/ClickHouse/pull/65686) ([Raúl Marín](https://github.com/Algunenano)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Refactoring of Server.h: Isolate server management from other logic"'. [#64425](https://github.com/ClickHouse/ClickHouse/pull/64425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Remove some unnecessary `UNREACHABLE`s"'. [#64430](https://github.com/ClickHouse/ClickHouse/pull/64430) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "CI: fix build_report selection in case of job reuse"'. [#64516](https://github.com/ClickHouse/ClickHouse/pull/64516) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Revert "CI: fix build_report selection in case of job reuse""'. [#64531](https://github.com/ClickHouse/ClickHouse/pull/64531) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Add `fromReadableSize` function"'. [#64616](https://github.com/ClickHouse/ClickHouse/pull/64616) ([Robert Schulze](https://github.com/rschu1ze)). +* NO CL ENTRY: 'Update CHANGELOG.md'. [#64816](https://github.com/ClickHouse/ClickHouse/pull/64816) ([Paweł Kudzia](https://github.com/pakud)). +* NO CL ENTRY: 'Revert "Reduce lock contention for MergeTree tables (by renaming parts without holding lock)"'. [#64899](https://github.com/ClickHouse/ClickHouse/pull/64899) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Add dynamic untracked memory limits for more precise memory tracking"'. [#64969](https://github.com/ClickHouse/ClickHouse/pull/64969) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Fix duplicating Delete events in blob_storage_log"'. [#65049](https://github.com/ClickHouse/ClickHouse/pull/65049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Fix duplicating Delete events in blob_storage_log""'. [#65053](https://github.com/ClickHouse/ClickHouse/pull/65053) ([vdimir](https://github.com/vdimir)). +* NO CL ENTRY: 'Revert "S3: reduce retires time for queries, increase retries count for backups"'. [#65148](https://github.com/ClickHouse/ClickHouse/pull/65148) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Small fix for 02340_parts_refcnt_mergetree"'. [#65149](https://github.com/ClickHouse/ClickHouse/pull/65149) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Change default s3_throw_on_zero_files_match to true, document that presigned S3 URLs are not supported"'. [#65250](https://github.com/ClickHouse/ClickHouse/pull/65250) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Fix AWS ECS"'. [#65361](https://github.com/ClickHouse/ClickHouse/pull/65361) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Try abort on current thread join. [#42544](https://github.com/ClickHouse/ClickHouse/pull/42544) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* This change was reverted. [#51008](https://github.com/ClickHouse/ClickHouse/pull/51008) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer fuzzer 2. [#57098](https://github.com/ClickHouse/ClickHouse/pull/57098) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer fuzzer 4. [#57101](https://github.com/ClickHouse/ClickHouse/pull/57101) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check python code with flake8. [#58349](https://github.com/ClickHouse/ClickHouse/pull/58349) ([Azat Khuzhin](https://github.com/azat)). +* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove http_max_chunk_size setting (too internal). [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)). +* Fix race in refreshable materialized views causing SELECT to fail sometimes. [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)). +* Refactor KeyCondition and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)). +* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Parallel replicas: table check failover. [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)). +* This change was reverted. [#61973](https://github.com/ClickHouse/ClickHouse/pull/61973) ([Azat Khuzhin](https://github.com/azat)). +* Avoid crashing on column type mismatch in a few dozen places. [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix optimize_if_chain_to_multiif const NULL handling. [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)). +* Use intrusive lists for `ResourceRequest` instead of deque. [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Fix validateAggregates for tables with different aliases. [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)). +* Improve code and tests of `DROP` of multiple tables. [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix exception message during writing to partitioned s3/hdfs/azure path with globs. [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)). +* Support UBSan on Clang-19 (master). [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Replay ZK logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)). +* Save the stacktrace of thread waiting on failing AsyncLoader job. [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)). +* group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer: prefer column name to alias from array join. [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)). +* CI: try separate the workflows file for GitHub's Merge Queue. [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)). +* Try to fix coverage tests. [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)). +* Fix azure backup flaky test. [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920). [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)). +* QueryAnalysisPass improve QUALIFY validation. [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)). +* Add numpy tests for different endianness. [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Clean the `_work` directory between runner's launches. Fallback to auto-update actions runner if it fails to start. Make the `init-network.sh` sourceable and executable. [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)). +* Fix possible endless loop while reading from azure. [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)). +* Add information about materialized view security bug fix into the changelog. [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)). +* Disable one test from 02994_sanity_check_settings. [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)). +* Enable custom parquet encoder by default, attempt 2. [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version after release. [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.4.1.2088-stable. [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.3.3.102-lts. [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.2.3.70-stable. [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks). [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)). +* Hide CI options under a spoiler. [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)). +* Now syntax for this command is following: `TRUNCATE ALL TABLES FROM [IF EXISTS] `. [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Minor follow-up to a renaming PR. [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)). +* Followup for [#62613](https://github.com/ClickHouse/ClickHouse/issues/62613) Adding back checks similar to these: https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL99 https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL144-L149. [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)). +* This setting was added in 24.5, not 24.4. [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)). +* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.8.14.6-lts. [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix azure flaky test. [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor. [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)). +* DiskApp: fix 'list --recursive /' and crash on invalid arguments. [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown`. [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)). +* Move s3_plain_rewritable unit test to shell. [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)). +* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264). [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)). +* Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Reduce time-to-insert profiling data in case of logs cluster issues. [#63325](https://github.com/ClickHouse/ClickHouse/pull/63325) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md. [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)). +* This should fix failures with error like `Permission denied ["/var/lib/clickhouse/disks/s3/store/364/3643ff83-0996-4a4a-a90b-a96e66a10c74"]` when table dir was chmod-ed by DatabaseCatalog. [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)). +* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287). [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md. [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)). +* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049). [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)). +* Add option `query_id_prefix` to `clickhouse-benchmark`. [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)). +* New version is fantatish (at least with Ubuntu 22.04.4 LTS): ``` azurite --version /usr/local/lib/node_modules/azurite/dist/src/common/persistence/MemoryExtentStore.js:53 return this._chunks.get(categoryName)?.chunks.get(id); ^. [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)). +* Randomize setting `enable_block_offset_column` in stress tests. [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)). +* Fix AST parsing of invalid type names. [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix some 00002_log_and_exception_messages_formatting flakiness. [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)). +* Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655). [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `reportBrokenPart`. [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)). +* Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862). [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix parsing of `CREATE INDEX` query. [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)). +* We are using Shared Catalog in the CI Logs cluster. [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix collection of coverage data in the CI Logs cluster. [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test for rocksdb bulk sink. [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)). +* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)). +* io_uring: refactor get reader from context. [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)). +* Analyzer setting max_streams_to_max_threads_ratio overflow fix. [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)). +* Provides setting `output_format_pretty_preserve_border_for_multiline_string` which allows to render multiline strings in pretty format better. The default value for this setting is true. [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367). [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307). [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)). +* Remove leftovers of GCC support in cmake rules. [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)). +* Fix ProfileEventTimeIncrement code. [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)). +* MergeTreePrefetchedReadPool: Print parent name when logging projection parts. [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly stop `asyncCopy` tasks in all cases. [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)). +* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)). [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update randomization of `old_parts_lifetime`. [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 02240_system_filesystem_cache_table.sh. [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in `DistributedSink`. [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix azure tests run on master. [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)). +* The commit 2b8254f987a65d5c21d74fe67b4ee9757970466e was not synced into the cloud because it was falsely marked as a success by `upstream_pr.head.sha`. Here we'll try our best to find a proper commit, and won't make anything if we can't. [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `no-s3-storage` tag to local_plain_rewritable ut. [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)). +* Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Go back to upstream lz4. [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical error in ColumnTuple::tryInsert(). [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)). +* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT. [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)). +* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)). +* Update README.md. [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)). +* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build. [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)). +* Fixes for 00002_log_and_exception_messages_formatting. [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime). [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)). +* Update clickhouse-test help section. [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)). +* Fix bad test `02950_part_log_bytes_uncompressed`. [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove leftovers of `optimize_monotonous_functions_in_order_by`. [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)). +* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness. [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)). +* Parallel replicas: simple cleanup. [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)). +* Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Explaining insertion order of the Map datatype. [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)). +* selectRangesToRead() simple cleanup. [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzed analyzer_join_with_constant query. [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing explicit instantiations of ColumnUnique. [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)). +* Better asserts in ColumnString.h. [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)). +* Try to fix flaky s3 tests test_seekable_formats and test_seekable_formats_url. [#63720](https://github.com/ClickHouse/ClickHouse/pull/63720) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts. [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix flaky 03145_non_loaded_projection_backup.sh. [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments(). [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN. [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Self explanatory. [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)). +* Do not hide disk name. [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: remove Cancel and Debug workflows as redundant. [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)). +* Security Policy: Add notification process. [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)). +* Fix typo. [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)). +* Fix fuzzer when only explicit faults are used. [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)). +* Settings typo. [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)). +* Ref. [#63479](https://github.com/ClickHouse/ClickHouse/issues/63479). [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Rewrite plan for parallel replicas in Planner. [#63796](https://github.com/ClickHouse/ClickHouse/pull/63796) ([Igor Nikonov](https://github.com/devcrafter)). +* Follow-up for the `binary_symbols` table in CI. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support INSERT with VALUES in the ANTLR syntax file. [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)). +* Fix race in `ReplicatedMergeTreeLogEntryData`. [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow allocation during job destructor in `ThreadPool`. [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* io_uring: add basic io_uring clickhouse perf test. [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)). +* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* fix typo. [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)). +* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* This PR was reverted. [#63857](https://github.com/ClickHouse/ClickHouse/pull/63857) ([Sema Checherinda](https://github.com/CheSema)). +* Remove unnecessary logging statements in MergeJoinTransform.cpp. [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)). +* Temporary disables 3 integration tcs on arm until https://github.com/clickhouse/clickhouse/issues/63855 is resolved. [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)). +* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky. [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)). +* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests. [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)). +* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)). +* More sanity checks for parallel replicas. [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)). +* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Insignificant libunwind build fixes. [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)). +* Revert multiline pretty changes due to performance problems. [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)). +* Some usability improvements for c++expr script. [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* CI: aarch64: disable arm integration tests with kerberaized kafka. [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)). +* Make events like [timeouts](https://play.clickhouse.com/play?user=play#U0VMRUNUICogRlJPTSBjaGVja3MgV0hFUkUgdGVzdF9uYW1lID09ICdDaGVjayB0aW1lb3V0IGV4cGlyZWQnIEFORCBjaGVja19zdGFydF90aW1lIEJFVFdFRU4gdG9EYXRlKCcyMDI0LTA1LTEwJykgQU5EIHRvRGF0ZSgnMjAyNC0wNS0xNScp) visible in CI DB. [#63982](https://github.com/ClickHouse/ClickHouse/pull/63982) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Slightly better setting `force_optimize_projection_name`. [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)). +* chore(ci-workers): remove reusable from tailscale key. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)). +* Better script to collect symbols statistics. [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo in Analyzer. [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix libbcrypt for FreeBSD build. [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)). +* Remove some unnecessary `UNREACHABLE`s. [#64035](https://github.com/ClickHouse/ClickHouse/pull/64035) ([Robert Schulze](https://github.com/rschu1ze)). +* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix searching for libclang_rt.builtins.*.a on FreeBSD. [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)). +* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix waiting for mutations with retriable errors. [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* harmonize h3PointDist* error messages. [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)). +* This log message is better in Trace. [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent stack overflow in Fuzzer and Stress test. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests: fix expected error for 03036_reading_s3_archives (fixes CI). [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)). +* Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Update llvm/clang to 18.1.6. [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)). +* Set green Mergeable Check status only after all required checks are passed with success - All non-required checks are started at stage Test_3 when all required checks are passed in Test_1/2. [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)). +* Move `isAllASCII` from UTFHelper to StringUtils. [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)). +* Throw out some `inline`s. [#64110](https://github.com/ClickHouse/ClickHouse/pull/64110) ([Robert Schulze](https://github.com/rschu1ze)). +* Clean up .clang-tidy after transition to Clang 18. [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)). +* Ignore exception when checking for cgroupsv2. [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* See https://s3.amazonaws.com/clickhouse-test-reports/63946/86cf1e13d866333b8a511badd7f2fe186d810646/ast_fuzzer__ubsan_.html. [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactoring of Server.h: Isolate server management from other logic. [#64132](https://github.com/ClickHouse/ClickHouse/pull/64132) ([TTPO100AJIEX](https://github.com/TTPO100AJIEX)). +* Syncing code. [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)). +* Losen build resource limits for unusual architectures. [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix clang tidy. [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)). +* Fix: 02124_insert_deduplication_token_multiple_blocks_replica. [#64181](https://github.com/ClickHouse/ClickHouse/pull/64181) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix global query profiler. [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)). +* CI: cancel running PR wf after adding to MQ. [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)). +* Add profile events for number of rows read during/after prewhere. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)). +* Add debug logging to EmbeddedRocksDBBulkSink. [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)). +* Fix special builds (due to excessive resource usage - memory/CPU). [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)). +* Update InterpreterCreateQuery.cpp. [#64207](https://github.com/ClickHouse/ClickHouse/pull/64207) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused storage_snapshot field from MergeTreeSelectProcessor. [#64217](https://github.com/ClickHouse/ClickHouse/pull/64217) ([Alexander Gololobov](https://github.com/davenger)). +* Add test for [#37090](https://github.com/ClickHouse/ClickHouse/issues/37090). [#64220](https://github.com/ClickHouse/ClickHouse/pull/64220) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Small cli tool. [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Make `settings_changes_history` const. [#64230](https://github.com/ClickHouse/ClickHouse/pull/64230) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* test for [#45804](https://github.com/ClickHouse/ClickHouse/issues/45804). [#64245](https://github.com/ClickHouse/ClickHouse/pull/64245) ([Denny Crane](https://github.com/den-crane)). +* Update version after release. [#64283](https://github.com/ClickHouse/ClickHouse/pull/64283) ([Raúl Marín](https://github.com/Algunenano)). +* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691). [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)). +* CI: dependency fix for changelog.py. [#64293](https://github.com/ClickHouse/ClickHouse/pull/64293) ([Max K.](https://github.com/maxknv)). +* Print query in explain plan with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)). +* CI: Cancel sync wf on new push. [#64299](https://github.com/ClickHouse/ClickHouse/pull/64299) ([Max K.](https://github.com/maxknv)). +* CI: master workflow with folded jobs. [#64340](https://github.com/ClickHouse/ClickHouse/pull/64340) ([Max K.](https://github.com/maxknv)). +* CI: Sync, Merge check, CI gh's statuses fixes. [#64348](https://github.com/ClickHouse/ClickHouse/pull/64348) ([Max K.](https://github.com/maxknv)). +* Enable 02494_query_cache_nested_query_bug for Analyzer. [#64357](https://github.com/ClickHouse/ClickHouse/pull/64357) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename allow_deprecated_functions to allow_deprecated_error_prone_window_functions. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Change input_format_parquet_use_native_reader to 24.6. [#64359](https://github.com/ClickHouse/ClickHouse/pull/64359) ([Raúl Marín](https://github.com/Algunenano)). +* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress`. [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)). +* Changed the unreleased setting `aggregate_function_group_array_has_limit_size` to `aggregate_function_group_array_action_when_limit_is_reached`. [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)). +* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* Try to fix GWPAsan. [#64365](https://github.com/ClickHouse/ClickHouse/pull/64365) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: add secrets to reusable stage wf yml. [#64366](https://github.com/ClickHouse/ClickHouse/pull/64366) ([Max K.](https://github.com/maxknv)). +* Do not run tests tagged 'no-s3-storage-with-slow-build' with ASan. [#64367](https://github.com/ClickHouse/ClickHouse/pull/64367) ([vdimir](https://github.com/vdimir)). +* This change was reverted. [#64386](https://github.com/ClickHouse/ClickHouse/pull/64386) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Update s3queue.md. [#64389](https://github.com/ClickHouse/ClickHouse/pull/64389) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#64211](https://github.com/ClickHouse/ClickHouse/issues/64211). [#64390](https://github.com/ClickHouse/ClickHouse/pull/64390) ([Denny Crane](https://github.com/den-crane)). +* Follow-up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64398](https://github.com/ClickHouse/ClickHouse/pull/64398) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove wrong comment. [#64403](https://github.com/ClickHouse/ClickHouse/pull/64403) ([Sergei Trifonov](https://github.com/serxa)). +* Follow up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64404](https://github.com/ClickHouse/ClickHouse/pull/64404) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactor s3 settings (move settings parsing into single place). [#64412](https://github.com/ClickHouse/ClickHouse/pull/64412) ([Kseniia Sumarokova](https://github.com/kssenii)). +* This PR was reverted. [#64423](https://github.com/ClickHouse/ClickHouse/pull/64423) ([Sergei Trifonov](https://github.com/serxa)). +* Fix test after [#64404](https://github.com/ClickHouse/ClickHouse/issues/64404). [#64432](https://github.com/ClickHouse/ClickHouse/pull/64432) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Faster TestKeeper shutdown. [#64433](https://github.com/ClickHouse/ClickHouse/pull/64433) ([Alexander Gololobov](https://github.com/davenger)). +* Remove some logging. [#64434](https://github.com/ClickHouse/ClickHouse/pull/64434) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert "Revert "Remove some unnecessary UNREACHABLEs"". [#64435](https://github.com/ClickHouse/ClickHouse/pull/64435) ([Robert Schulze](https://github.com/rschu1ze)). +* Clean settings in 02943_variant_read_subcolumns test. [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a comment after [#64226](https://github.com/ClickHouse/ClickHouse/issues/64226). [#64449](https://github.com/ClickHouse/ClickHouse/pull/64449) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: fix build_report selection in case of job reuse. [#64459](https://github.com/ClickHouse/ClickHouse/pull/64459) ([Max K.](https://github.com/maxknv)). +* Add Critical bugfix category in PR template. [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)). +* Remove `generateSnowflakeIDThreadMonotonic`. [#64499](https://github.com/ClickHouse/ClickHouse/pull/64499) ([Robert Schulze](https://github.com/rschu1ze)). +* Move analyzer attempt 2. [#64500](https://github.com/ClickHouse/ClickHouse/pull/64500) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Sync some code back from internal to public repository. [#64502](https://github.com/ClickHouse/ClickHouse/pull/64502) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `generateUUIDv7(NonMonotonic|ThreadMonotonic)` functions. [#64506](https://github.com/ClickHouse/ClickHouse/pull/64506) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bash completion for settings. [#64521](https://github.com/ClickHouse/ClickHouse/pull/64521) ([Azat Khuzhin](https://github.com/azat)). +* Use max_read_buffer_size for file descriptors as well in file(). [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)). +* Temporarily disable `enable_vertical_final` setting by default. This feature should not be used in older releases because it [might crash](https://github.com/ClickHouse/ClickHouse/issues/64543), but it's already fixed in 24.6 where this setting change has been reverted and `enable_vertical_final` is again enabled by default. [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Removed excessive calls to `flush logs` and disabled under sanitizers. [#64550](https://github.com/ClickHouse/ClickHouse/pull/64550) ([Nikita Taranov](https://github.com/nickitat)). +* Sync code moved in private repo back back to public repo. [#64551](https://github.com/ClickHouse/ClickHouse/pull/64551) ([Robert Schulze](https://github.com/rschu1ze)). +* Add support for custom type to ASTLiteral, or else the type may be lost when parse the ast. E.g. set a ASTLiteral to DataTime32 with value 19870, then it will be parsed to Int16. [#64562](https://github.com/ClickHouse/ClickHouse/pull/64562) ([shuai.xu](https://github.com/shuai-xu)). +* Add a temporary known host for git over ssh. [#64569](https://github.com/ClickHouse/ClickHouse/pull/64569) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Cache first analysis result in ReadFromMergeTree. [#64579](https://github.com/ClickHouse/ClickHouse/pull/64579) ([Igor Nikonov](https://github.com/devcrafter)). +* Derive script parameters (labels) from the --repo/--from-repo - fix to not create backports for all release branches if backport for specific branch only. [#64603](https://github.com/ClickHouse/ClickHouse/pull/64603) ([Max K.](https://github.com/maxknv)). +* CI fixes. [#64605](https://github.com/ClickHouse/ClickHouse/pull/64605) ([Max K.](https://github.com/maxknv)). +* Double-checking [#59318](https://github.com/ClickHouse/ClickHouse/issues/59318) and docs for `Map`. [#64606](https://github.com/ClickHouse/ClickHouse/pull/64606) ([Robert Schulze](https://github.com/rschu1ze)). +* Update CHANGELOG.md. [#64609](https://github.com/ClickHouse/ClickHouse/pull/64609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tests: Convert numeric to symbolic error codes. [#64635](https://github.com/ClickHouse/ClickHouse/pull/64635) ([Robert Schulze](https://github.com/rschu1ze)). +* Move NamedCollectionsFactory into a separate file. [#64642](https://github.com/ClickHouse/ClickHouse/pull/64642) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Shuffle tests for parallel execution. [#64646](https://github.com/ClickHouse/ClickHouse/pull/64646) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: Do not upload binaries for special builds in PRs. [#64653](https://github.com/ClickHouse/ClickHouse/pull/64653) ([Max K.](https://github.com/maxknv)). +* Update changelog. [#64654](https://github.com/ClickHouse/ClickHouse/pull/64654) ([Robert Schulze](https://github.com/rschu1ze)). +* Parallel replicas: simple cleanup. [#64655](https://github.com/ClickHouse/ClickHouse/pull/64655) ([Igor Nikonov](https://github.com/devcrafter)). +* Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Build Report Check to verify only enabled builds. [#64669](https://github.com/ClickHouse/ClickHouse/pull/64669) ([Max K.](https://github.com/maxknv)). +* Tests: Convert error numbers to symbolic error codes, pt. II. [#64670](https://github.com/ClickHouse/ClickHouse/pull/64670) ([Robert Schulze](https://github.com/rschu1ze)). +* Split query analyzer. [#64672](https://github.com/ClickHouse/ClickHouse/pull/64672) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* By the end of CI, CI_Running status must be SUCCESS or FAILURE never PENDING. [#64693](https://github.com/ClickHouse/ClickHouse/pull/64693) ([Max K.](https://github.com/maxknv)). +* The following list of merged PRs is not present in the release branch and was added to the changelog by mistake:. [#64704](https://github.com/ClickHouse/ClickHouse/pull/64704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: MergeQueue: add binary_release and unit tests. [#64705](https://github.com/ClickHouse/ClickHouse/pull/64705) ([Max K.](https://github.com/maxknv)). +* Fix to get first good enough GH token instead of getting and comparing all of them. [#64709](https://github.com/ClickHouse/ClickHouse/pull/64709) ([Max K.](https://github.com/maxknv)). +* Check for missing Upload ID in CreateMultipartUpload reply. [#64714](https://github.com/ClickHouse/ClickHouse/pull/64714) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version_date.tsv and changelogs after v24.5.1.1763-stable. [#64715](https://github.com/ClickHouse/ClickHouse/pull/64715) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix (unreleased) `loop()` table function crashing on empty table name. [#64716](https://github.com/ClickHouse/ClickHouse/pull/64716) ([Michael Kolupaev](https://github.com/al13n321)). +* Update CHANGELOG.md. [#64730](https://github.com/ClickHouse/ClickHouse/pull/64730) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: ci.py refactoring. [#64734](https://github.com/ClickHouse/ClickHouse/pull/64734) ([Max K.](https://github.com/maxknv)). +* Return the explanation for session moved error. [#64747](https://github.com/ClickHouse/ClickHouse/pull/64747) ([Antonio Andelic](https://github.com/antonio2368)). +* Adjust the version_helper and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not try to write columns.txt if it does not exist for write-once storages. [#64762](https://github.com/ClickHouse/ClickHouse/pull/64762) ([Azat Khuzhin](https://github.com/azat)). +* Update 02482_load_parts_refcounts.sh. [#64765](https://github.com/ClickHouse/ClickHouse/pull/64765) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix assert in IObjectStorageIteratorAsync. [#64770](https://github.com/ClickHouse/ClickHouse/pull/64770) ([Michael Kolupaev](https://github.com/al13n321)). +* Make table functions always report engine 'StorageProxy' in system.tables. [#64771](https://github.com/ClickHouse/ClickHouse/pull/64771) ([Michael Kolupaev](https://github.com/al13n321)). +* Ask about company name on GitHub. [#64774](https://github.com/ClickHouse/ClickHouse/pull/64774) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky tests about SQLite. [#64776](https://github.com/ClickHouse/ClickHouse/pull/64776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove iostream debug helpers. [#64777](https://github.com/ClickHouse/ClickHouse/pull/64777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unnecessary comment. [#64785](https://github.com/ClickHouse/ClickHouse/pull/64785) ([Raúl Marín](https://github.com/Algunenano)). +* Follow-ups to some PRs. [#64787](https://github.com/ClickHouse/ClickHouse/pull/64787) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Attempt to fix 02228_merge_tree_insert_memory_usage.sql flakiness for s3. [#64800](https://github.com/ClickHouse/ClickHouse/pull/64800) ([Raúl Marín](https://github.com/Algunenano)). +* Add regression test for filter propagation through `Merge` engine. [#64806](https://github.com/ClickHouse/ClickHouse/pull/64806) ([Nikita Taranov](https://github.com/nickitat)). +* Migrate changelog.py to a descendant of fuzzywuzzy. [#64807](https://github.com/ClickHouse/ClickHouse/pull/64807) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* A follow-up for https://github.com/ClickHouse/ClickHouse/pull/64039 and [#64759](https://github.com/ClickHouse/ClickHouse/issues/64759). [#64813](https://github.com/ClickHouse/ClickHouse/pull/64813) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make row order optimization non-experimental. [#64814](https://github.com/ClickHouse/ClickHouse/pull/64814) ([Robert Schulze](https://github.com/rschu1ze)). +* Didn't catch it at the time when all versions belonged to the current year. [#64817](https://github.com/ClickHouse/ClickHouse/pull/64817) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix clang-tidy build. [#64823](https://github.com/ClickHouse/ClickHouse/pull/64823) ([Robert Schulze](https://github.com/rschu1ze)). +* Sets all builds that we run tests on to normal build list. [#64824](https://github.com/ClickHouse/ClickHouse/pull/64824) ([Max K.](https://github.com/maxknv)). +* CI: fix CI await feature. [#64825](https://github.com/ClickHouse/ClickHouse/pull/64825) ([Max K.](https://github.com/maxknv)). +* Fix clang-tidy. [#64827](https://github.com/ClickHouse/ClickHouse/pull/64827) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Upload blob_storage_log from stateless tests. [#64843](https://github.com/ClickHouse/ClickHouse/pull/64843) ([alesapin](https://github.com/alesapin)). +* Follow-up to [#64349](https://github.com/ClickHouse/ClickHouse/issues/64349). [#64845](https://github.com/ClickHouse/ClickHouse/pull/64845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Simplify handling of old 'inverted' indexes. [#64846](https://github.com/ClickHouse/ClickHouse/pull/64846) ([Robert Schulze](https://github.com/rschu1ze)). +* Use issue templates defined in YAML provide more user-friendly experience. [#64850](https://github.com/ClickHouse/ClickHouse/pull/64850) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Handle logs from rocksdb by ClickHouse internal logging. [#64856](https://github.com/ClickHouse/ClickHouse/pull/64856) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59357. [#64860](https://github.com/ClickHouse/ClickHouse/pull/64860) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* added mlock and mlockall to aspell-dict to be ignored. [#64863](https://github.com/ClickHouse/ClickHouse/pull/64863) ([Ali](https://github.com/xogoodnow)). +* A tiny fix for fancy quotes. [#64883](https://github.com/ClickHouse/ClickHouse/pull/64883) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible loss of "Query was cancelled" message in client. [#64888](https://github.com/ClickHouse/ClickHouse/pull/64888) ([Azat Khuzhin](https://github.com/azat)). +* We accidentally lost the way to set `PR Check` failure at some point. [#64890](https://github.com/ClickHouse/ClickHouse/pull/64890) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix global trace collector. [#64896](https://github.com/ClickHouse/ClickHouse/pull/64896) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test_mask_sensitive_info/test.py::test_create_table. [#64901](https://github.com/ClickHouse/ClickHouse/pull/64901) ([Azat Khuzhin](https://github.com/azat)). +* Update 03165_string_functions_with_token_text_indexes.sql. [#64903](https://github.com/ClickHouse/ClickHouse/pull/64903) ([Alexander Tokmakov](https://github.com/tavplubix)). +* When the branch is removed, it's impossible to get the diff by the labels. `print` in imported files spoils the `ipython` output. [#64904](https://github.com/ClickHouse/ClickHouse/pull/64904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable transactions for unsupported storages even for materialized v…. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)). +* additional log for cleanupDetachedTables. [#64919](https://github.com/ClickHouse/ClickHouse/pull/64919) ([Konstantin Morozov](https://github.com/k-morozov)). +* Fix tupleConcat of two empty tuples. This fixes [#64885](https://github.com/ClickHouse/ClickHouse/issues/64885). [#64923](https://github.com/ClickHouse/ClickHouse/pull/64923) ([Amos Bird](https://github.com/amosbird)). +* CI: Minor fixes in ci scripts. [#64950](https://github.com/ClickHouse/ClickHouse/pull/64950) ([Max K.](https://github.com/maxknv)). +* Fix error message (it was strange). [#64952](https://github.com/ClickHouse/ClickHouse/pull/64952) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update fmtlib version to 9.1.0. [#64959](https://github.com/ClickHouse/ClickHouse/pull/64959) ([Duc Canh Le](https://github.com/canhld94)). +* Test 02908_many_requests_to_system_replicas makes a lot of heavy requests and it overloads server if it's an ASAN build. [#64966](https://github.com/ClickHouse/ClickHouse/pull/64966) ([Alexander Gololobov](https://github.com/davenger)). +* Fix (unreleased) bug in short circuit evaluation. [#64967](https://github.com/ClickHouse/ClickHouse/pull/64967) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v24.4.2.141-stable. [#64968](https://github.com/ClickHouse/ClickHouse/pull/64968) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix `test_attach_partition_using_copy`. [#64977](https://github.com/ClickHouse/ClickHouse/pull/64977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Faster processing of scheduler queue activations. [#64985](https://github.com/ClickHouse/ClickHouse/pull/64985) ([Sergei Trifonov](https://github.com/serxa)). +* CI: Fix nightly workflow. [#64987](https://github.com/ClickHouse/ClickHouse/pull/64987) ([Max K.](https://github.com/maxknv)). +* Fix innocuous data race in detectLanguage. [#64988](https://github.com/ClickHouse/ClickHouse/pull/64988) ([Raúl Marín](https://github.com/Algunenano)). +* CI: Builds in CI settings. [#64994](https://github.com/ClickHouse/ClickHouse/pull/64994) ([Max K.](https://github.com/maxknv)). +* REVERTED. [#65009](https://github.com/ClickHouse/ClickHouse/pull/65009) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* CI: Fix backports. [#65010](https://github.com/ClickHouse/ClickHouse/pull/65010) ([Max K.](https://github.com/maxknv)). +* Try fix 03143_prewhere_profile_events. [#65014](https://github.com/ClickHouse/ClickHouse/pull/65014) ([Nikita Taranov](https://github.com/nickitat)). +* Fix 03165_string_functions_with_token_text_indexes. [#65018](https://github.com/ClickHouse/ClickHouse/pull/65018) ([Julia Kartseva](https://github.com/jkartseva)). +* This change was reverted. [#65028](https://github.com/ClickHouse/ClickHouse/pull/65028) ([Sergei Trifonov](https://github.com/serxa)). +* Bump googletest to latest HEAD. [#65038](https://github.com/ClickHouse/ClickHouse/pull/65038) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve comment about AsynchronousMetrics. [#65040](https://github.com/ClickHouse/ClickHouse/pull/65040) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Remove fuzzer build from normal CI run (bugfix). [#65041](https://github.com/ClickHouse/ClickHouse/pull/65041) ([Max K.](https://github.com/maxknv)). +* CI config refactoring. [#65045](https://github.com/ClickHouse/ClickHouse/pull/65045) ([Max K.](https://github.com/maxknv)). +* Bump abseil to latest HEAD. [#65048](https://github.com/ClickHouse/ClickHouse/pull/65048) ([Robert Schulze](https://github.com/rschu1ze)). +* Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Stateless tests: add test for SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT. [#65056](https://github.com/ClickHouse/ClickHouse/pull/65056) ([Nikita Fomichev](https://github.com/fm4v)). +* Increase timeout in wait_for_all_mutations. [#65058](https://github.com/ClickHouse/ClickHouse/pull/65058) ([Alexander Gololobov](https://github.com/davenger)). +* Tests for _time virtual column in file alike storages. [#65064](https://github.com/ClickHouse/ClickHouse/pull/65064) ([Ilya Golshtein](https://github.com/ilejn)). +* Update odbc-bridge.md. [#65099](https://github.com/ClickHouse/ClickHouse/pull/65099) ([Alexander Gololobov](https://github.com/davenger)). +* Small fix for 02340_parts_refcnt_mergetree. [#65105](https://github.com/ClickHouse/ClickHouse/pull/65105) ([Nikita Taranov](https://github.com/nickitat)). +* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Update test_replicated_database/test.py. [#65112](https://github.com/ClickHouse/ClickHouse/pull/65112) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `Initiator received more initial requests than there are replicas` with `loop` engine. [#65133](https://github.com/ClickHouse/ClickHouse/pull/65133) ([Nikita Taranov](https://github.com/nickitat)). +* Fix 'Tasks in BackgroundSchedulePool cannot throw' caused by MergeTreeData::loadUnexpectedDataParts(). [#65135](https://github.com/ClickHouse/ClickHouse/pull/65135) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix bad error message. [#65137](https://github.com/ClickHouse/ClickHouse/pull/65137) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Just fixing flaky unit tests. [#65152](https://github.com/ClickHouse/ClickHouse/pull/65152) ([Sema Checherinda](https://github.com/CheSema)). +* This change was reverted. [#65164](https://github.com/ClickHouse/ClickHouse/pull/65164) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure submodules are named consistently. [#65167](https://github.com/ClickHouse/ClickHouse/pull/65167) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove obsolete fix from aws submodule. [#65168](https://github.com/ClickHouse/ClickHouse/pull/65168) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Fix not-merged cherry-picks for backports. [#65181](https://github.com/ClickHouse/ClickHouse/pull/65181) ([Max K.](https://github.com/maxknv)). +* Add an assertion in ReplicatedMergeTreeQueue. [#65184](https://github.com/ClickHouse/ClickHouse/pull/65184) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug in unreleased code. [#65185](https://github.com/ClickHouse/ClickHouse/pull/65185) ([Raúl Marín](https://github.com/Algunenano)). +* Fix docs for skipping-indexes.md. [#65194](https://github.com/ClickHouse/ClickHouse/pull/65194) ([morning-color](https://github.com/morning-color)). +* Fix the descriptions of some server settings. [#65200](https://github.com/ClickHouse/ClickHouse/pull/65200) ([Raúl Marín](https://github.com/Algunenano)). +* Fix issue after [#64813](https://github.com/ClickHouse/ClickHouse/issues/64813) with broken search in the changelog, and missing zstd in a style-check image. [#65202](https://github.com/ClickHouse/ClickHouse/pull/65202) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix bug in unreleased code. [#65203](https://github.com/ClickHouse/ClickHouse/pull/65203) ([Raúl Marín](https://github.com/Algunenano)). +* Add test prewhere merge. [#65207](https://github.com/ClickHouse/ClickHouse/pull/65207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Sync ProfileEvents.h. [#65208](https://github.com/ClickHouse/ClickHouse/pull/65208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* FinishCheck to set failure if workflow failed. [#65228](https://github.com/ClickHouse/ClickHouse/pull/65228) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.3.4.147-lts. [#65235](https://github.com/ClickHouse/ClickHouse/pull/65235) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.5.3.5-stable. [#65240](https://github.com/ClickHouse/ClickHouse/pull/65240) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fails sometimes for debug build https://s3.amazonaws.com/clickhouse-test-reports/0/af6afd904316bfb771737faa147ce8aea72dd705/stateless_tests__debug__[4_5].html. [#65245](https://github.com/ClickHouse/ClickHouse/pull/65245) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix libunwind in CI. [#65247](https://github.com/ClickHouse/ClickHouse/pull/65247) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Do not skip FinishCheck in Merge Queue. [#65249](https://github.com/ClickHouse/ClickHouse/pull/65249) ([Max K.](https://github.com/maxknv)). +* Add a test just in case. [#65271](https://github.com/ClickHouse/ClickHouse/pull/65271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable 02581_share_big_sets_between_multiple_mutations_tasks_long in coverage run. [#65295](https://github.com/ClickHouse/ClickHouse/pull/65295) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.8.15.35-lts. [#65300](https://github.com/ClickHouse/ClickHouse/pull/65300) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* mute test test_query_is_canceled_with_inf_retries. [#65301](https://github.com/ClickHouse/ClickHouse/pull/65301) ([Sema Checherinda](https://github.com/CheSema)). +* Fix silly typo that caused wrong tags messages. [#65307](https://github.com/ClickHouse/ClickHouse/pull/65307) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Save server data for failed stateless tests. [#65309](https://github.com/ClickHouse/ClickHouse/pull/65309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 01246_buffer_flush flakiness (by tuning timeouts). [#65310](https://github.com/ClickHouse/ClickHouse/pull/65310) ([Azat Khuzhin](https://github.com/azat)). +* Remove outdated override in stress tests. [#65323](https://github.com/ClickHouse/ClickHouse/pull/65323) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad code in `system.session_log`. [#65332](https://github.com/ClickHouse/ClickHouse/pull/65332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add tests for 'boom filter index with map'. [#65333](https://github.com/ClickHouse/ClickHouse/pull/65333) ([iceFireser](https://github.com/iceFireser)). +* Fix crash in 03036_dynamic_read_subcolumns. [#65341](https://github.com/ClickHouse/ClickHouse/pull/65341) ([Kruglov Pavel](https://github.com/Avogar)). +* Move tests 02942_variant_cast and 02944_variant_as_common_type to analyzer_tech_debt.txt. [#65342](https://github.com/ClickHouse/ClickHouse/pull/65342) ([Kruglov Pavel](https://github.com/Avogar)). +* REVERTED. [#65384](https://github.com/ClickHouse/ClickHouse/pull/65384) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* CI: Add Non-blocking (Woolen wolfdog) CI mode. [#65385](https://github.com/ClickHouse/ClickHouse/pull/65385) ([Max K.](https://github.com/maxknv)). +* Fix compatibility release check. [#65394](https://github.com/ClickHouse/ClickHouse/pull/65394) ([Alexey Katsman](https://github.com/alexkats)). +* Move a leaksan suppression from Poco into OpenSSL. [#65396](https://github.com/ClickHouse/ClickHouse/pull/65396) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix tidy build. [#65415](https://github.com/ClickHouse/ClickHouse/pull/65415) ([Sergei Trifonov](https://github.com/serxa)). +* Remove Tests dependency on Builds_2. No tests depend on Builds_2. [#65416](https://github.com/ClickHouse/ClickHouse/pull/65416) ([Max K.](https://github.com/maxknv)). +* CI: PR workflow dependencies fix. [#65442](https://github.com/ClickHouse/ClickHouse/pull/65442) ([Max K.](https://github.com/maxknv)). +* Fix test_storage_s3_queue/test.py::test_max_set_age. [#65452](https://github.com/ClickHouse/ClickHouse/pull/65452) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: Rename A Sync status. [#65456](https://github.com/ClickHouse/ClickHouse/pull/65456) ([Max K.](https://github.com/maxknv)). +* CI: Rename sync status. [#65464](https://github.com/ClickHouse/ClickHouse/pull/65464) ([Max K.](https://github.com/maxknv)). +* This change was reverted. [#65466](https://github.com/ClickHouse/ClickHouse/pull/65466) ([Sergei Trifonov](https://github.com/serxa)). +* Remove a feature wasn't part of any release yet. [#65480](https://github.com/ClickHouse/ClickHouse/pull/65480) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65657](https://github.com/ClickHouse/ClickHouse/issues/65657): Fix of `PlanSquashingTransform`: pipeline stuck. [#65487](https://github.com/ClickHouse/ClickHouse/pull/65487) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Backported in [#65504](https://github.com/ClickHouse/ClickHouse/issues/65504): Fix bad test `02922_deduplication_with_zero_copy`. [#65492](https://github.com/ClickHouse/ClickHouse/pull/65492) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#65591](https://github.com/ClickHouse/ClickHouse/issues/65591): Setting `uniform_snowflake_conversion_functions` (not in any release yet) was replaced by setting `allow_deprecated_snowflake_conversion_functions`. The latter controls if the legacy snowflake conversion functions are available (by default, they are not). [#65522](https://github.com/ClickHouse/ClickHouse/pull/65522) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65759](https://github.com/ClickHouse/ClickHouse/issues/65759): Renames Build report jobs. [#65554](https://github.com/ClickHouse/ClickHouse/pull/65554) ([Max K.](https://github.com/maxknv)). +* Backported in [#65773](https://github.com/ClickHouse/ClickHouse/issues/65773): `base64En/Decode64Url` --> `base64En/Decode64URL`. [#65760](https://github.com/ClickHouse/ClickHouse/pull/65760) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65805](https://github.com/ClickHouse/ClickHouse/issues/65805): CI: Fix for Builds report job in backports and releases. [#65774](https://github.com/ClickHouse/ClickHouse/pull/65774) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 1bff4cb0b09..b274905b0f2 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,6 +1,8 @@ +v24.6.1.4423-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 v24.5.1.1763-stable 2024-06-01 +v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 v24.3.4.147-lts 2024-06-13 From b1b9aaf2ec77d4098a49760dab3b70994d6fa3f4 Mon Sep 17 00:00:00 2001 From: Anton Kozlov Date: Mon, 3 Jun 2024 15:38:40 +0000 Subject: [PATCH 070/140] Add support for x509 SubjectAltName identification --- .../external-authenticators/ssl-x509.md | 15 ++- .../en/sql-reference/statements/alter/user.md | 2 +- .../sql-reference/statements/create/user.md | 2 +- .../external-authenticators/ssl-x509.md | 19 ++-- .../sql-reference/statements/create/user.md | 2 +- src/Access/Authentication.cpp | 11 +- src/Access/AuthenticationData.cpp | 31 ++++-- src/Access/AuthenticationData.h | 10 +- src/Access/Common/SSLCertificateSubjects.cpp | 95 +++++++++++++++++ src/Access/Common/SSLCertificateSubjects.h | 48 +++++++++ src/Access/Credentials.cpp | 10 +- src/Access/Credentials.h | 8 +- src/Access/UsersConfigAccessStorage.cpp | 12 ++- src/Parsers/Access/ASTAuthenticationData.cpp | 2 +- src/Parsers/Access/ASTAuthenticationData.h | 1 + src/Parsers/Access/ASTCreateUserQuery.cpp | 1 + src/Parsers/Access/ParserCreateUserQuery.cpp | 30 ++++-- src/Parsers/CommonParsers.h | 1 + src/Server/HTTPHandler.cpp | 14 +-- src/Server/TCPHandler.cpp | 2 +- src/Storages/System/StorageSystemUsers.cpp | 20 +++- .../certs/ca-cert.pem | 56 +++++----- .../certs/ca-cert.srl | 2 +- .../certs/ca-key.pem | 100 +++++++++--------- .../certs/client1-cert.pem | 52 ++++----- .../certs/client1-key.pem | 100 +++++++++--------- .../certs/client1-req.pem | 46 ++++---- .../certs/client2-cert.pem | 52 ++++----- .../certs/client2-key.pem | 100 +++++++++--------- .../certs/client2-req.pem | 46 ++++---- .../certs/client3-cert.pem | 52 ++++----- .../certs/client3-key.pem | 100 +++++++++--------- .../certs/client3-req.pem | 46 ++++---- .../certs/client4-cert.pem | 31 ++++++ .../certs/client4-ext.cnf | 1 + .../certs/client4-key.pem | 52 +++++++++ .../certs/client4-req.pem | 27 +++++ .../certs/generate_certs.sh | 2 + .../certs/server-cert.pem | 56 +++++----- .../certs/server-key.pem | 100 +++++++++--------- .../certs/server-req.pem | 46 ++++---- .../certs/wrong-cert.pem | 54 +++++----- .../certs/wrong-key.pem | 100 +++++++++--------- .../configs/users_with_ssl_auth.xml | 6 ++ .../test_ssl_cert_authentication/test.py | 36 +++++++ 45 files changed, 983 insertions(+), 616 deletions(-) create mode 100644 src/Access/Common/SSLCertificateSubjects.cpp create mode 100644 src/Access/Common/SSLCertificateSubjects.h create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client4-key.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client4-req.pem diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md index 109913c2b18..09fac45d7ae 100644 --- a/docs/en/operations/external-authenticators/ssl-x509.md +++ b/docs/en/operations/external-authenticators/ssl-x509.md @@ -6,23 +6,30 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m -[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. +[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. -To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: +To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: **Example** ```xml - + host.domain.com:example_user host.domain.com:example_user_dev - + + + + DNS:host.domain.com + + + + ``` diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index b5c156f56a9..6216b83c2ef 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index aee98cfcd10..8c9143ee086 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [IN access_storage_type] diff --git a/docs/ru/operations/external-authenticators/ssl-x509.md b/docs/ru/operations/external-authenticators/ssl-x509.md index affdf87b199..7f1fb03962c 100644 --- a/docs/ru/operations/external-authenticators/ssl-x509.md +++ b/docs/ru/operations/external-authenticators/ssl-x509.md @@ -3,23 +3,30 @@ slug: /ru/operations/external-authenticators/ssl-x509 --- # Аутентификация по сертификату SSL X.509 {#ssl-external-authentication} -[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse. +[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` или `subjectAltName` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse. -Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` для каждого пользователя ClickHouse в файле настройки `config.xml`: +Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` или `subjectAltName` для каждого пользователя ClickHouse в файле настройки `config.xml`: **Example** ```xml - - + + host.domain.com:example_user host.domain.com:example_user_dev - + - + + + + DNS:host.domain.com + + + + ``` diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 76cfdb251dc..fac2cacf8cc 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -13,7 +13,7 @@ sidebar_label: "Пользователь" ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index bf1fe3feec3..7bd22626f80 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "config.h" @@ -226,7 +227,15 @@ bool Authentication::areCredentialsValid( throw Authentication::Require(auth_data.getKerberosRealm()); case AuthenticationType::SSL_CERTIFICATE: - return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); + for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN}) + { + for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type)) + { + if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject)) + return true; + } + } + return false; case AuthenticationType::SSH_KEY: #if USE_SSH diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index a32215f3d92..b975551f59f 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "config.h" #if USE_SSL @@ -104,7 +105,7 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) - && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names) + && (lhs.ssl_certificate_subjects == rhs.ssl_certificate_subjects) #if USE_SSH && (lhs.ssh_keys == rhs.ssh_keys) #endif @@ -261,11 +262,16 @@ String AuthenticationData::getSalt() const return salt; } -void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set common_names_) +void AuthenticationData::setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_) { - if (common_names_.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names."); - ssl_certificate_common_names = std::move(common_names_); + if (ssl_certificate_subjects_.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of subjects."); + ssl_certificate_subjects = std::move(ssl_certificate_subjects_); +} + +void AuthenticationData::addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_) +{ + ssl_certificate_subjects.insert(type_, std::move(subject_)); } std::shared_ptr AuthenticationData::toAST() const @@ -319,7 +325,14 @@ std::shared_ptr AuthenticationData::toAST() const } case AuthenticationType::SSL_CERTIFICATE: { - for (const auto & name : getSSLCertificateCommonNames()) + using SSLCertificateSubjects::Type::CN; + using SSLCertificateSubjects::Type::SAN; + + const auto &subjects = getSSLCertificateSubjects(); + SSLCertificateSubjects::Type cert_subject_type = !subjects.at(SAN).empty() ? SAN : CN; + + node->ssl_cert_subject_type = toString(cert_subject_type); + for (const auto & name : getSSLCertificateSubjects().at(cert_subject_type)) node->children.push_back(std::make_shared(name)); break; @@ -493,11 +506,9 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que } else if (query.type == AuthenticationType::SSL_CERTIFICATE) { - boost::container::flat_set common_names; + auto ssl_cert_subject_type = parseSSLCertificateSubjectType(*query.ssl_cert_subject_type); for (const auto & arg : args) - common_names.insert(checkAndGetLiteralArgument(arg, "common_name")); - - auth_data.setSSLCertificateCommonNames(std::move(common_names)); + auth_data.addSSLCertificateSubject(ssl_cert_subject_type, checkAndGetLiteralArgument(arg, "ssl_certificate_subject")); } else if (query.type == AuthenticationType::HTTP) { diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index c97e0327b56..8093fe1d888 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -2,13 +2,14 @@ #include #include +#include #include #include #include #include #include -#include + #include "config.h" @@ -58,8 +59,9 @@ public: const String & getKerberosRealm() const { return kerberos_realm; } void setKerberosRealm(const String & realm) { kerberos_realm = realm; } - const boost::container::flat_set & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; } - void setSSLCertificateCommonNames(boost::container::flat_set common_names_); + const SSLCertificateSubjects & getSSLCertificateSubjects() const { return ssl_certificate_subjects; } + void setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_); + void addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_); #if USE_SSH const std::vector & getSSHKeys() const { return ssh_keys; } @@ -96,7 +98,7 @@ private: Digest password_hash; String ldap_server_name; String kerberos_realm; - boost::container::flat_set ssl_certificate_common_names; + SSLCertificateSubjects ssl_certificate_subjects; String salt; #if USE_SSH std::vector ssh_keys; diff --git a/src/Access/Common/SSLCertificateSubjects.cpp b/src/Access/Common/SSLCertificateSubjects.cpp new file mode 100644 index 00000000000..ca7001a31a2 --- /dev/null +++ b/src/Access/Common/SSLCertificateSubjects.cpp @@ -0,0 +1,95 @@ +#include +#include + +#if USE_SSL +#include +#endif + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +#if USE_SSL +SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate) +{ + + SSLCertificateSubjects subjects; + if (!certificate.commonName().empty()) + { + subjects.insert(SSLCertificateSubjects::Type::CN, certificate.commonName()); + } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" + auto stackof_general_name_deleter = [](void * ptr) { GENERAL_NAMES_free(static_cast(ptr)); }; + std::unique_ptr cert_names( + X509_get_ext_d2i(const_cast(certificate.certificate()), NID_subject_alt_name, nullptr, nullptr), + stackof_general_name_deleter); + + if (STACK_OF(GENERAL_NAME) * names = static_cast(cert_names.get())) + { + for (int i = 0; i < sk_GENERAL_NAME_num(names); ++i) + { + const GENERAL_NAME * name = sk_GENERAL_NAME_value(names, i); + if (name->type == GEN_DNS || name->type == GEN_URI) + { + const char * data = reinterpret_cast(ASN1_STRING_get0_data(name->d.ia5)); + std::size_t len = ASN1_STRING_length(name->d.ia5); + std::string subject = (name->type == GEN_DNS ? "DNS:" : "URI:") + std::string(data, len); + subjects.insert(SSLCertificateSubjects::Type::SAN, std::move(subject)); + } + } + } + +#pragma clang diagnostic pop + return subjects; +} +#endif + + +void SSLCertificateSubjects::insert(const String & subject_type_, String && subject) +{ + insert(parseSSLCertificateSubjectType(subject_type_), std::move(subject)); +} + +void SSLCertificateSubjects::insert(Type subject_type_, String && subject) +{ + subjects[static_cast(subject_type_)].insert(std::move(subject)); +} + +SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_) +{ + if (type_ == "CN") + return SSLCertificateSubjects::Type::CN; + if (type_ == "SAN") + return SSLCertificateSubjects::Type::SAN; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown SSL Certificate Subject Type: {}", type_); +} + +String toString(SSLCertificateSubjects::Type type_) +{ + switch (type_) + { + case SSLCertificateSubjects::Type::CN: + return "CN"; + case SSLCertificateSubjects::Type::SAN: + return "SAN"; + } +} + +bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs) +{ + for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN}) + { + if (lhs.at(type) != rhs.at(type)) + return false; + } + return true; +} + +} + diff --git a/src/Access/Common/SSLCertificateSubjects.h b/src/Access/Common/SSLCertificateSubjects.h new file mode 100644 index 00000000000..ec11714d48a --- /dev/null +++ b/src/Access/Common/SSLCertificateSubjects.h @@ -0,0 +1,48 @@ +#pragma once + +#include "config.h" +#include +#include + +#if USE_SSL +# include +#endif + +namespace DB +{ +class SSLCertificateSubjects +{ +public: + using container = boost::container::flat_set; + enum class Type + { + CN, + SAN + }; + +private: + std::array subjects; + +public: + inline const container & at(Type type_) const { return subjects[static_cast(type_)]; } + inline bool empty() + { + for (auto & subject_list : subjects) + { + if (!subject_list.empty()) + return false; + } + return true; + } + void insert(const String & subject_type_, String && subject); + void insert(Type type_, String && subject); + friend bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs); +}; + +String toString(SSLCertificateSubjects::Type type_); +SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_); + +#if USE_SSL +SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate); +#endif +} diff --git a/src/Access/Credentials.cpp b/src/Access/Credentials.cpp index f9886c0182b..f01700b6e46 100644 --- a/src/Access/Credentials.cpp +++ b/src/Access/Credentials.cpp @@ -1,7 +1,7 @@ #include +#include #include - namespace DB { @@ -48,18 +48,18 @@ void AlwaysAllowCredentials::setUserName(const String & user_name_) user_name = user_name_; } -SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, const String & common_name_) +SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_) : Credentials(user_name_) - , common_name(common_name_) + , certificate_subjects(subjects_) { is_ready = true; } -const String & SSLCertificateCredentials::getCommonName() const +const SSLCertificateSubjects & SSLCertificateCredentials::getSSLCertificateSubjects() const { if (!isReady()) throwNotReady(); - return common_name; + return certificate_subjects; } BasicCredentials::BasicCredentials() diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index d04f8a66541..5f6b0269eef 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include #include "config.h" @@ -42,11 +44,11 @@ class SSLCertificateCredentials : public Credentials { public: - explicit SSLCertificateCredentials(const String & user_name_, const String & common_name_); - const String & getCommonName() const; + explicit SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_); + const SSLCertificateSubjects & getSSLCertificateSubjects() const; private: - String common_name; + SSLCertificateSubjects certificate_subjects; }; class BasicCredentials diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1f9a977bab6..9c4fe2c80c1 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -194,18 +195,23 @@ namespace /// Fill list of allowed certificates. Poco::Util::AbstractConfiguration::Keys keys; config.keys(certificates_config, keys); - boost::container::flat_set common_names; for (const String & key : keys) { if (key.starts_with("common_name")) { String value = config.getString(certificates_config + "." + key); - common_names.insert(std::move(value)); + user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::CN, std::move(value)); + } + else if (key.starts_with("subject_alt_name")) + { + String value = config.getString(certificates_config + "." + key); + if (value.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty"); + user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::SAN, std::move(value)); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); } - user->auth_data.setSSLCertificateCommonNames(std::move(common_names)); } else if (has_ssh_keys) { diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 3a62480dc0c..e6e3499493f 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -106,7 +106,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt } case AuthenticationType::SSL_CERTIFICATE: { - prefix = "CN"; + prefix = ssl_cert_subject_type.value(); parameters = true; break; } diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h index de166bdf234..7f0644b3437 100644 --- a/src/Parsers/Access/ASTAuthenticationData.h +++ b/src/Parsers/Access/ASTAuthenticationData.h @@ -33,6 +33,7 @@ public: std::optional getPassword() const; std::optional getSalt() const; + std::optional ssl_cert_subject_type; /// CN or SubjectAltName /// If type is empty we use the default password type. /// AuthenticationType::NO_PASSWORD is specified explicitly. diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 02735568a04..6f0ccc76797 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index d4729ab796a..d4a8813e9e4 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -20,7 +20,6 @@ #include #include #include - #include "config.h" namespace DB @@ -65,7 +64,7 @@ namespace bool expect_hash = false; bool expect_ldap_server_name = false; bool expect_kerberos_realm = false; - bool expect_common_names = false; + bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; @@ -82,7 +81,7 @@ namespace else if (check_type == AuthenticationType::KERBEROS) expect_kerberos_realm = true; else if (check_type == AuthenticationType::SSL_CERTIFICATE) - expect_common_names = true; + expect_ssl_cert_subjects = true; else if (check_type == AuthenticationType::SSH_KEY) expect_public_ssh_key = true; else if (check_type == AuthenticationType::HTTP) @@ -122,9 +121,10 @@ namespace ASTPtr value; ASTPtr parsed_salt; - ASTPtr common_names; ASTPtr public_ssh_keys; ASTPtr http_auth_scheme; + ASTPtr ssl_cert_subjects; + std::optional ssl_cert_subject_type; if (expect_password || expect_hash) { @@ -153,12 +153,19 @@ namespace return false; } } - else if (expect_common_names) + else if (expect_ssl_cert_subjects) { - if (!ParserKeyword{Keyword::CN}.ignore(pos, expected)) + for (const Keyword &keyword : {Keyword::CN, Keyword::SAN}) + if (ParserKeyword{keyword}.ignore(pos, expected)) + { + ssl_cert_subject_type = toStringView(keyword); + break; + } + + if (!ssl_cert_subject_type) return false; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, common_names, expected)) + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ssl_cert_subjects, expected)) return false; } else if (expect_public_ssh_key) @@ -166,7 +173,7 @@ namespace if (!ParserKeyword{Keyword::BY}.ignore(pos, expected)) return false; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, common_names, expected)) + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, public_ssh_keys, expected)) return false; } else if (expect_http_auth_server) @@ -195,8 +202,11 @@ namespace if (parsed_salt) auth_data->children.push_back(std::move(parsed_salt)); - if (common_names) - auth_data->children = std::move(common_names->children); + if (ssl_cert_subjects) + { + auth_data->ssl_cert_subject_type = ssl_cert_subject_type.value(); + auth_data->children = std::move(ssl_cert_subjects->children); + } if (public_ssh_keys) auth_data->children = std::move(public_ssh_keys->children); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index f0cbe42da80..ab1dc4a67f4 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -417,6 +417,7 @@ namespace DB MR_MACROS(SALT, "SALT") \ MR_MACROS(SAMPLE_BY, "SAMPLE BY") \ MR_MACROS(SAMPLE, "SAMPLE") \ + MR_MACROS(SAN, "SAN") \ MR_MACROS(SCHEME, "SCHEME") \ MR_MACROS(SECOND, "SECOND") \ MR_MACROS(SECONDS, "SECONDS") \ diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 02d0959ff50..6269f98f314 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -38,7 +38,9 @@ #include #include #include +#include +#include #include "config.h" #include @@ -377,7 +379,7 @@ bool HTTPHandler::authenticateUser( bool has_credentials_in_query_params = params.has("user") || params.has("password"); std::string spnego_challenge; - std::string certificate_common_name; + SSLCertificateSubjects certificate_subjects; if (has_auth_headers) { @@ -400,11 +402,11 @@ bool HTTPHandler::authenticateUser( "to use SSL certificate authentication and authentication via password simultaneously"); if (request.havePeerCertificate()) - certificate_common_name = request.peerCertificate().commonName(); + certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); - if (certificate_common_name.empty()) + if (certificate_subjects.empty()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name"); + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL certificate authentication disabled because ClickHouse was built without SSL library"); @@ -448,10 +450,10 @@ bool HTTPHandler::authenticateUser( password = params.get("password", ""); } - if (!certificate_common_name.empty()) + if (!certificate_subjects.empty()) { if (!request_credentials) - request_credentials = std::make_unique(user, certificate_common_name); + request_credentials = std::make_unique(user, std::move(certificate_subjects)); auto * certificate_credentials = dynamic_cast(request_credentials.get()); if (!certificate_credentials) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e3a820340ad..0af3b6f8912 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1479,7 +1479,7 @@ void TCPHandler::receiveHello() try { session->authenticate( - SSLCertificateCredentials{user, secure_socket.peerCertificate().commonName()}, + SSLCertificateCredentials{user, extractSSLCertificateSubjects(secure_socket.peerCertificate())}, getClientAddress(client_info)); return; } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 0c34f04844d..541bd852140 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -17,6 +17,9 @@ #include #include #include +#include +#include "base/types.h" +#include #include @@ -142,10 +145,19 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte } else if (auth_data.getType() == AuthenticationType::SSL_CERTIFICATE) { - Poco::JSON::Array::Ptr arr = new Poco::JSON::Array(); - for (const auto & common_name : auth_data.getSSLCertificateCommonNames()) - arr->add(common_name); - auth_params_json.set("common_names", arr); + Poco::JSON::Array::Ptr common_names = new Poco::JSON::Array(); + Poco::JSON::Array::Ptr subject_alt_names = new Poco::JSON::Array(); + + const auto & subjects = auth_data.getSSLCertificateSubjects(); + for (const String & subject : subjects.at(SSLCertificateSubjects::Type::CN)) + common_names->add(subject); + for (const String & subject : subjects.at(SSLCertificateSubjects::Type::SAN)) + subject_alt_names->add(subject); + + if (common_names->size() > 0) + auth_params_json.set("common_names", common_names); + if (subject_alt_names->size() > 0) + auth_params_json.set("subject_alt_names", subject_alt_names); } std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem index a64cd623750..d1e4a3a88d9 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFhTCCA22gAwIBAgIUQOHzlr+pa+RiBlRROQnQgfkDRUMwDQYJKoZIhvcNAQEL +MIIFhTCCA22gAwIBAgIUZmPYBB6vdp8uxKlJcS8mI0SArqQwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwODA4 -MTcwNTQwWhcNMzIwODA1MTcwNTQwWjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTAwWhcNMzQwNjI0MTAyNTAwWjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw -CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALgd5Tta -+M/XAGMhdi44sKEp7f81kTwqUib4mkHj9Nyp0MKMo9KodV+t60Fej/L0D9Fm1N3V -Q+CC4numWWyaBK6hFdfkU5wvbzZnBx8KozMoeiPLPes6QOl9VWkq3lpBDLAcEZM6 -LMsvNy1jsUdQ06lLy6j1lH9dgIe77qNf9UxZhtCJ4ZslogI8oY3q6D/UI1oCpx5L -J42OyOoCGbTFwzwtHhMw5WKI0EHXEOWwXubdM7P3ETxTjp2+vYPSXj5Irq5XVyfd -tkCJ7GWouB9x5vYa9Y6DC0J/cgOsdrLBbvLGMTubbFjO87musc1DCb9Svpu/IQDP -PawdFTn4ASPny3vt0TMRa/O8nBkrIW84O820ddXZhBb0tNT/q+ftffec2rOas7U4 -kE7YzbQthk9otBqvPX+VeCjFJ5Kx3KTOiPGc/eyWHoJbjvwNrnFuEWIiN/1TqhCc -Y3Qq8cud601yu3dBtCrNAEDCclfnRO0wdKNgkP3u2/lWY4xvMyJfxhNsb/R4R8Z6 -VVYQ4vJXoON0GGYs9D2KURlo+bMIz9hLtZLshK+voOetmRhUmYKa1gE3UxWLmJQM -/p8A7Zihr4OMv5ioH/hnXvVcSJj9VcsPMF3Z/RhllvOxN4TBLyZ0hW55oEz3B4Bn -IbA8LcRJUwfQTKtrVHyt07NLbQm0Kp7YYO8FAgMBAAGjUzBRMB0GA1UdDgQWBBRP -/aq+8kGTfMRAmPgrNaaEHkdKEzAfBgNVHSMEGDAWgBRP/aq+8kGTfMRAmPgrNaaE -HkdKEzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCycqlIfQMF -bxjczVV+ootUWeiD9Qo8w3VosQtR6PYxDkFZGgP2/aD3wIRNSBpmRfnHCdv1sURB -XkKpqG3E22NRDLHwjmXjI8y5BkUxX1xwL9WX5pe9yZTn3eZu6lPtXrhcPnuMvsxs -X+aP0Y30iMEpHTxLlvV4yNso7ZucXaunWfQTKV18FuXROPgwwnbcZscCQhTxOA+d -JrB+8WD3BwwzmJU8Whmojmt4pYhzS7q47OIfkxIGugtZbtSxAWXBMGrV0KJkipr+ -omYUV1qCbYXZgh+h2+JtNdBpIZ3K+i3esXT14N9e72oysmoYFQN4Qol8lMgZ3Jdr -cTH3m1zg1fOne7KT30XKyfTr4iYoph9WHrcv1XwxwYzPbMI+HdMJX2M30V3HEMRY -RLoafxUZNkFdpWcx29Dok1AI/aVU4vE4+32YdctSJNVPrT/V+Y3dX8skt8tgrnbg -JnrFCpEuVhkNiwgTS6ktH2ecdpY2VqesUENJtw+m85cCBjxd8XYhRNoFBPQp8SAv -hEeGc+hIjXYffy6AUo9p+45uOU+RBPKH4hSleESkrI7duajEXaPPl/wJeQYhqvWp -imbAJtqwI6aCf78NOlbzWiTWJt3g+4kla6oJTInGAdYHcOwwY1KwMWKtSO2ARHjM -wCCDUCIbtJgxTrUk1Kgty5ipLgP64m29Pw== +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALHzp+SR +T8EtrMBYNlgwIUGMZaXomQPwsjOpjt3RUsdE3LCQ15rLuyBwZ0SbLMDOazCDA7Xr ++AXHDYwg/PCJhe2N4NTzfgnWqbkIGwYLhys95Xxq+q+kV7Csk/27JSk1rsK3Nru/ +Iuj/NWgBkAQC8n10qfbJSXHcXm9wDVy/L8t7DXwxkX70LToGWsb6QW6VkiWe/QOW +QYTMVncqjrtLTCTnqGIk0s6ZIifgPLfaYJxtwK+LdIoJioSbAuVfHmVTe10vjonO +YqJ56KfqOn/G7dIf1l9gOLFBAe4jUf3RS1wkQMdk+oEGLbGnShgW+BQMfB48RkrQ +486h7slzwC29jBJLRARI2Oc9p8/zBDVph0pxkjVGka8dfIkZbmTD932h/1gfMgQl +F20G/H5FF1jk37bDcsczns0c24S1F2uJbzOlHjFLhqmH1IaVCWsYawfBs9khModW +VS6+WAv//cqWE3KmmJ2EdtAmzMCJzAQUEyrMZWmrFrBzpACyMq2zFEtyIXsCXpgq +eW4odxIIZlClibo1FGrflqN+hXnAhRyCj7WJBQ0ZPrEdRMRpBYhYdmygPJ+lWfsg +HOtNnshSuJTXGJTtsuJVr4Ioeq6tKfWGofRu4vvT6cILZjbp9VaxxfVLS7bTsjWR +c+5xHp+KbcjSfw2cJHQN85hlWTpMe9jPhgt/AgMBAAGjUzBRMB0GA1UdDgQWBBSJ +Kj1dSYN0jW+Mu0xFQrobZFmQpTAfBgNVHSMEGDAWgBSJKj1dSYN0jW+Mu0xFQrob +ZFmQpTAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCEm/epjNWe +w0A+1M3i0y8adrXWdTeRXuP3Klgtjh3NGFKNm9WGnOAufsqUIpVwKLw1uYNIhiEA +fj5KuD7YIVU9CrWx4Z/+lILUZhyn/br5mACtRiRTF/75/QwLcDz5z7K9MyMzdL99 +DLQ9bd3JCuEgVj6zacPrALwWVhM8u9fNGJxdQANnIC8yTY5+ZE59/fn7UI787JuR +4njOGWSVnDavbTyJCPMPiUkgwqL+QSWBcNbGAPzMaAblvc1SL2Lj/ikFDAETAZs2 +T/3ZqBqHEOuVhFQYTAvMAdMQX3w8bYv/CGL8++W+qHazY+uqPypd9CLnICbnkZmr +P+id9WleGl2F//u1CQ+YA2Q3EazSFhwRLA7IKIVCrYVaBsbe/bpxxZb6+AQVfM/i ++7+fCbr7A5HDe9Fi4dClv6xPI0GZZkarhQPsoLPaDQeqM4OE+K6oPSHJnqfAB8v3 +NgTt1QuVnKhwveX5DDEP4t/Qt4j2n7AFpeoZDEA8aM33K0noXNrwqHz3991O1RWz +t/gd+cFG/Z1jRP8kYtfAV8go2nzt8QvqBhfIcNnMwD8cwuKJ5G7SdqLvDFj3XCCO +YqQAALl4QFs046eVDEWLajSESmj4fCaTmO05sHHLy7U5asoAo/MWGbmGmL+8ExUX +sPO9r12lPJ7IThJ13PSqbJIJnnloL/XCxA== -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl index c02cd0a4526..cf47b0dc79c 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl @@ -1 +1 @@ -05F10C67567FE30795D77AF2540F6AC8D4CF2461 +05F10C67567FE30795D77AF2540F6AC8D4CF2470 diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem index 26616a084fb..2dea2ccd837 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC4HeU7WvjP1wBj -IXYuOLChKe3/NZE8KlIm+JpB4/TcqdDCjKPSqHVfretBXo/y9A/RZtTd1UPgguJ7 -pllsmgSuoRXX5FOcL282ZwcfCqMzKHojyz3rOkDpfVVpKt5aQQywHBGTOizLLzct -Y7FHUNOpS8uo9ZR/XYCHu+6jX/VMWYbQieGbJaICPKGN6ug/1CNaAqceSyeNjsjq -Ahm0xcM8LR4TMOViiNBB1xDlsF7m3TOz9xE8U46dvr2D0l4+SK6uV1cn3bZAiexl -qLgfceb2GvWOgwtCf3IDrHaywW7yxjE7m2xYzvO5rrHNQwm/Ur6bvyEAzz2sHRU5 -+AEj58t77dEzEWvzvJwZKyFvODvNtHXV2YQW9LTU/6vn7X33nNqzmrO1OJBO2M20 -LYZPaLQarz1/lXgoxSeSsdykzojxnP3slh6CW478Da5xbhFiIjf9U6oQnGN0KvHL -netNcrt3QbQqzQBAwnJX50TtMHSjYJD97tv5VmOMbzMiX8YTbG/0eEfGelVWEOLy -V6DjdBhmLPQ9ilEZaPmzCM/YS7WS7ISvr6DnrZkYVJmCmtYBN1MVi5iUDP6fAO2Y -oa+DjL+YqB/4Z171XEiY/VXLDzBd2f0YZZbzsTeEwS8mdIVueaBM9weAZyGwPC3E -SVMH0Eyra1R8rdOzS20JtCqe2GDvBQIDAQABAoICACUNdyVz0GDbbuuEP2lb5Uf5 -zTiwChNmnPPU0g5ylsOVtTqB3TI2zrA7qv2TzzpgnZJRtIMUOT3EeVnTB2oQsOKH -7oJtI9ppvWLgZxF7LY7NFY+AhmyBY7DZlUe95dOaBbinXW4Ypg0Z6mvLBVFJ1d7b -LxhIm+Lp6UTnsEXWuOHtnnD3Xpy/fRQgKY8K+ERoJ9kkcYdqLoKReqOFWrEDn5/L -oKLSE0e6KtclNfRTgzXIzG/qR+CxHCFo0nDl9CtqqyTQ1Oxj9RqV5yoqQyuo24cp -s8NDYg+24DYXDig5P1/CPQQ26h4RQAFdQ9EALcUAHPXxB+5Gxq2NiMkKvQgBzPRC -92yD8IDarjwizeP5hKCOVCAyspHxtOFgMS8LpHQ4ul++sOPSYV8pL+v0dOhs0aky -96PZyZWsvnN/CVNXuuTAi0RvyUDTGhxt4mc9TUe8+XqFC9P1BHaCyg+jxbEjJCWR -YnJfBiL0auXSL6R/RZ+LSRQtS5RUMNKESYNHFYVik/mQHZoy/1W4pOoe25fnQ8f9 -/80IIzsGq7RDVHgsbPCKgwN7dpDMhTmWcjrC31+oOTWFmIkDkp+dS37ErO6f9kLF -e5syBKTvbV2yscKCTWXkoE9kgCk0TVlwysWKUQtRV3JKsy//YLH/PfjfH/PdWBvv -CXRmZJTH7+Ua5RIEGoT9AoIBAQDD+GMnfYkSOPZPNv6BA3AW7PvX3lwCE5eRorsD -G1hBfYbh582/yG7pRvSTxdxH400jwCU7Et2wlxj4ClZxOyBoeaOi+8wxZB4xFpGG -e7OhGaQJ2Vd4LWqjdb178Qk2a4q8oOsciHoQQzJhTZhd+iiJLpnTIDTR13OVJKeF -Xi4FFk3GPOq7TpugMdQXZ1XB7rh5t9lDjeYRVj0fDU5amBONPKcVYO8lIZElsZjQ -EzfxAeYEJxN+SaNkUUGew3QDZI/caF0nji4WcuEH2LracshoQaCl5MLaPJpMJIOv -aCa442AO1vpRWNyk9dZDjrm6+MjkR1gYPTSaaNll3lmqr2AvAoIBAQDwg/sJOFWa -Hq4lMD5MuX/u7We5HcT1QOzlVsXBEiGiUNcQUuQ+leIZRqJSosEpgnuVS33sbc4r -fDngZJLItwezQ7A/1LqzJVgHb0Qp4Sq0ak905ibjpRd19y9E3tx47KqNzziYcp0M -8t+gOBH5tDWHk8fjcujMuwRaQoHoJvDVl7jSoPhYVh8BIS1lKvYz5/hk/2yG43AF -MIjbUvC4b3yMiPXmvoUKjZNrAM/5f3E4A7SW1aCLEJ1Humepd3pLUrZsma6qq9x8 -lb2MQeW3UG/Yrfo1NquocX9KjHawGe3vTrPWoi0FQMYVHPKQB0oeLd/dkveYOl8B -DPEcUEBDrMMLAoIBAQC0fvCEjOFVAVYLu/FJTqtM0lVbjBheaUEvCB0LTYJKP52e -MGyW+br97TaTbKfpJngEN5OTB1tcrK54trzLadP9phuLoDOAQmB0gf7jSMg312/5 -Ck/ABsSzbVxq7aS4lPChr+0cyK70j4+1g0yVMjVntJ5FkOJVbAWvFqWc9xOX5+UW -c0EX59z6/SoFrE2WY5NRuH3J9MY0raN6GD1Uv8EyUuzpifB52KLhfuPqsejSeBct -N/iCVrzBRv4Thp4yCctfDFjQd+oHDyQon/vp+9KOA/Q9F6hIbY0IGJGZW2pe3D27 -LVI83oXLAgqpCqoShdYyO2vuV2E93thtNRCNXx5NAoIBAE3/Lvi1eSUqz+SleX6e -JXvRJ3Lj0YyLSoexFR5gh+HAf43+o6AcMR9rRZawyx1wAC3RNnvmvBZkCczYMLXA -jVG4IL8CeK1B7gMzNRKzv4qUc0IApRnr3ujViVG3SB46+bBVRBBEEuQxGw41QLcO -ltFpvkfnatGB0I6IxOIJRs6tjjVYGFFlVFakcLk/Lmp0zpvKLWKs/RXhwHAHvKLD -HMviWoRCwphCg00PDWLmzkuRAA5uJssSTz8Elztg0Jr+rsQXLoqQg0cvtDF46UsC -XdMR0HNTUGWmsNX3KUJAlmWlyzJOk3UBpXsRUSQeCQ4yaEfNsld+jnKjxMkeyUhp -DTMCggEBAJs4Ng8C7Pp3q/cP8/lRaei89NueSINNXKKxzuKNg6iy6w9kOqPZef6+ -MoAe14I0UX3051mUcz1RqJU34iOC1p2CjjfM7eT8Ue9TULHeMqc1wj6xseSqZYWM -3R41p96lum179pC+hRGRl2l418xx1/bo079sB8yS/qsUiDyBIBkASgiulXkYDBER -T7L99aqXHrBlEu1S1THcYt8GOqOUuYZ7+RK9i9+Irk58zN/Pkucj0QMHN4WVInYA -kepOlUn0KlUlyn8g9Ii7gZKHFKbv4F5QIdXE5tXX88sRtqHG7GYMFa7kZMGG6bIX -7MqACWG/2mFExJnaXtgA2HS8NYPKUcQ= +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQCx86fkkU/BLazA +WDZYMCFBjGWl6JkD8LIzqY7d0VLHRNywkNeay7sgcGdEmyzAzmswgwO16/gFxw2M +IPzwiYXtjeDU834J1qm5CBsGC4crPeV8avqvpFewrJP9uyUpNa7Ctza7vyLo/zVo +AZAEAvJ9dKn2yUlx3F5vcA1cvy/Lew18MZF+9C06BlrG+kFulZIlnv0DlkGEzFZ3 +Ko67S0wk56hiJNLOmSIn4Dy32mCcbcCvi3SKCYqEmwLlXx5lU3tdL46JzmKieein +6jp/xu3SH9ZfYDixQQHuI1H90UtcJEDHZPqBBi2xp0oYFvgUDHwePEZK0OPOoe7J +c8AtvYwSS0QESNjnPafP8wQ1aYdKcZI1RpGvHXyJGW5kw/d9of9YHzIEJRdtBvx+ +RRdY5N+2w3LHM57NHNuEtRdriW8zpR4xS4aph9SGlQlrGGsHwbPZITKHVlUuvlgL +//3KlhNyppidhHbQJszAicwEFBMqzGVpqxawc6QAsjKtsxRLciF7Al6YKnluKHcS +CGZQpYm6NRRq35ajfoV5wIUcgo+1iQUNGT6xHUTEaQWIWHZsoDyfpVn7IBzrTZ7I +UriU1xiU7bLiVa+CKHqurSn1hqH0buL70+nCC2Y26fVWscX1S0u207I1kXPucR6f +im3I0n8NnCR0DfOYZVk6THvYz4YLfwIDAQABAoICABZRI14j5yen7cFVjsMyjgkl +bV4INKBs4DxgaF1jMglxlmfCUnfEUxx3XEwbVdp8SK8VzzJSfJFk7EsFnBMifBxV +rbunKchcFn7xCEqSyYnfwlb/J589cg3jJtAsVzW62MbsqT2Uc/FaiD0Z7RDDuduH +9QTRK5fO9jzthY97HqhbL07C/Kc6Qi3DvEC2A9y1f1WegcagYmkgIzvgp3PPtqXu +M4zTZ2inhcQQeCzqgzE7Bm49hAkHt0p4Ej3n1u0IMjF2lF6t9mq/9TCRzHJX5V1z +xrPBYnrAV1ihL1gwlk3g8grPnCbwOmzMORuaTdRd2HcGQh6B4f/5CPRUwuY5nkY7 +UMcX4rbRCcBDzG8s/NllTRaVC6yvEJPN4B/zOek6DI+tRy4tRj+BZO/1771bindD +nsYAklxbbpTiS7B073b+as3RFZW0xvmoqLyhzRQNW6GqSGj4C0KVigkWQ7Br69b7 +O2++oEurmLqI5Hm3RsJeJJ9obG4vKhPUPdtStF/W0W2TO2i0gpTOm+NeO5uYBRB1 +6OvhJH9dzMi+a0ekCpdQeD2xG4NLzwSe62/Ozz9Oi0rpAqHHhu+SvF+WEepLbkyO +2zx/OYpFK47idBRCAlHLC/9UyXpvw2yU9sleElVuHM41CzMe8Pwj3Qk0YdiHHDzS +Y19XEVHh/riXUufsAHVhAoIBAQDpUe+UJLgAIT6boz+MPLrPIGLeazCySEHEsdnp +jGuAx0M59PIDn5OeXQpMqCIRFDw6zhA/4gji0UegFRPIGA3Qduq+GsjVYRt6SHLC +N/rBRi2xg77yyKOMxv/+nwKFh/3TKIQbUc9EQj63WGBGCHu/EyMV7i9V0j8e7D2u +v/Z23nV/+XqawJXi4u2bHB3M/upOoclKfb3ewZBVouajzZc92kNC8XYfPn10Eofu +Pz7jxDX10IJNmzIYOozO9mlBsds7nFIBXS5wMz3iVg3GEUB05pPEKoEtZGrw474u +0M+gW9d7PV3qYdFgjSogiQf4JrSrOwXJQL/26nyfRX9QVplxAoIBAQDDP+fFT7Zl +eFLvouZu73lr++bV1+LogHgX+GHCSIujBELPyFEAyAeElFKFqnJ/NEOuPLG9X7tL +PDhh9NUijcBTPhVvwbH2/CRBdSX7Yf6RHh5fY+2Ik3hTF81L4bQcf0fgyX4roJY9 +YqpjQWFYGmSk4niCqWd+re/ZrYx/zpF+qgN21v37BAZNOCI+KidFScrq29p8kpLj +MdBWa0m7bhJcv4MPO46s2EJZVdczBU7iK86v5NVrGz7fPVm+tGxEDpwhyfYiu961 +U05XzT+trAaBa88KlSKdmUFq3qDdC4bFb6D+Ra4g+mhqNGtfBYXsavnENZgt0N99 +9M/dgaAEa/vvAoIBAQCm4YcFo9nDpgOc2H/Mc2d+XIC661oyOkJoSHk/dcMyodNw +scUkWFACdjO2ro9nPdzyho7S0n5elSew1UKH3MSMtXGjNY8uJ726kfUa+2UsNmDa +VgwOpPlt6KwTV3I7RhCDprgOvk4MWYF4LAr4LHsuKKbwuaM7tByXpotb4UuMrALI +3Q0XgOX0GTGvvsWF6VJ3mXpbAGL839+3kMN8p8EkaWewivpc0Jp0mgiFnNEDokSi +JFf+4CFNeRtbsJ2KcocHNQDmntpnQA9kQv6pC4/ZzU4lge1RJUDkOVC/NXU8ElSm +fjcdPIfAklduW/TKRgz1aEr0Lo7fMcqfNNsiAD7RAoIBAQCaeplZ13OsXMLhrrU6 +2GXtNeSxFJoG8n4SGQbfvJ4eYGSsGQVd5OVt1BxmfTERy7wwwvytpGx/XioN9rQb +HqQoOFqljU7M5zmYQKPIfQP4tSe6uUlaYbM1qwNXIkBqu5mXFFSrF+dGsiW1Wik2 +l8tBWZ2XY4jrBZtbUqBzDnC3ErSi9f8E92408lDFdnyTqYrOvxviq+VjtCnt9fzk +OnZ0w2FiT/DWeFQmcnBNgcmj0J07NYZVs7zOy6+R3xY50oVdhnkjihjuxfaaKV5U +fmK3SyEIcm5s2rCTaYlE2rXKyENMar0WgojSXp8FE02efBUZVH4O4c+xzFwaGVEN +rpIpAoIBAQDnAmhVy85n9oX6Bi+bXg9aZFa/teiCX33lCjNvNeJ5GljGqO0X6423 +6FVg91BYvnCbHQ0frqyKimVxNO/XYxCnid+Or48Cb9LwD31Wyip3UZABljg5sTb0 +fiNK0pxx6G8x1MsX0u97LogGwd5bMuff2lMi4xpinkz6ydB+fYsmM0UXGKsNkB/d +zR1izlqm87TMeQVi+pVZuOLmg/+hXVgISI2M7TlSoytODmpnSg5SZbaT7ut1IIwm +hJdWMTPHLv0X7NwsvhV4Knu27BJBw4470+okrOLfDuZ9+LZ6JHvYg22MywjCJx9s +MJuwAZJiZb+dQc0uwMkAEPMEfOQ1BI1+ -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem index 3a953f1b941..068855d96ea 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFC1il5+r7ZqfWcm+w5gJP/pffeI1MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRtMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMGHbRBS -+W5wgB0Nv66ODxB8lU1xCj/4imLZPZcLzPdtL0fR1TS+G8KjGSrxznEph4NbnFR7 -cE6fKtuC/OSSUMcBeRiXZxjW1+uYjGp6HGJYuyaeVKMrhP2W7BD9GuMu1l0k6+6K -R8gGCW+09ROH8Uy6rvCI+aGl6pgwabKzYWIu04EgIsdCgj9aRpkyxoTwFdriDJng -bTutXrPCC+LaozYJBD4tnwBzSfpMlP+1rEPc/Mpt4beDyLA9vp15uVYQNaJKopvu -CoflqLE04QhcMdubsUjQSikutz5x/chElyWG8w/5kjAKZmdwJLa/yxg3NsCoPdmS -j4aVwQwK0seY5o6yUpdADc0W2BzQ8veuQoSX/rWsf/lWQe2VG7aEn84TSJoUWwgX -LIRQuFdyjE13w0VdprJo6Z07Yuuo3cJ9dnnmh/LOZL84tLC4o4qSsnb5UjjS7HWC -IDVtsvz61tKyApJZ1IvsrEshj602whrIDTCZ6jMhpBLpZIj8GRxSRKxpC+Nqu5Zp -sovCNWbpYAkBns7svwEZaRfKY1Sm6bbgcZk1VaMKUIPHqUjndVkKjZ7SeptXCf2K -v18xEaXDLerg99IhRzUlNvv/MKrG1Y9ukO2xb7UBvFPzkeiL09MHTR1bZcvowiSO -3IlBvCixjuHur5UtsGX4wszpSrhtaCYqZG37AgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAFsyHyCiYGXGB1M/dSqnsXm4t7VtnaAKJxNYGtx4pv23jdgU56QgLpCXU6k/ -tOE0uNLTbIuDPRGPAPNlyazFG7Dk6OuEQZ6rTGpcgvgJMZisLynSUhSC1AO+10F+ -w/84EjzF11SrU/OuOh8UcKdNBQVJUu5MU3BVzoK6h+g5iG95KDAPpk/7yw46WMJJ -HI7PR2H2Xu8/7I9LyRl1kqMvzOEvLX0bLlQw5HZ8H/kEgxSOtUG8BqmQgxHL/EwI -kfYu7X7t/f9oqzi/AlFWtBBQg/SPDPpsf7uyxfcjduCiDtNxjAa7OId40WHrLoCD -5NqU8ssphuCKf9kxFp16SB8tkVjo7wJoWCgr8HHqArdOVpS+RNB3fjwvnDt+JXM6 -Xi6Ui6WBrTp7T/VQS2jMz04BbpVLnJcQXX1ri+zqQfM4KFsroZWTz/+WMdSD/tHS -6vnfDUKFEvN7GdN0hpV94r+YinbC9UTgRC7V6prrao24mU4EjaHjQJ+c6tymNzye -azPSoqJiYhDdFq/txxNp+OusBshz5sAl9yJye5vvvdCsClG/6USWVfcixBw9vhcp -m6LmgZd6Gc4cROHG5kGQNwPG8IHfr9hljGQGxnH2lvcRt8t/hEhP3NX+G4n4ihKx -g63Iv+ZMUHnHLs6qfQo7ll8150IrLTXubEKnH6M70/75j4wq +AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgT +PPqKnBNHrxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0 +PV6Io3KzmkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0S +gBgKUDfSxnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9 +atyhYgwuQHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ +1CrlZswIzR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyr +AsLf2GAbU5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1 +tMgydCPqpxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6P +c0dFRWi0VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7Kmw +yLFlSyqtXkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1 +ZUkB030FYpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJ +uyOnfYaQpUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAJ8XAILsGKVXyCBMazuq3k2rup8kkNU/hVg1RuYGmy4bNClNN6havzu/S1SO +/g00+tlXGBlSiPlRXq/p/oZgOI/efqIWSBfcmHuR9STdELfa7zugdFpscgaTOhTq +Ko5o1B81pZKw6wzVfn2JlGxnEy9e+lCC7ptMdFiBqc7SGrapkjCjLReszn1Jctk/ +9lsSvhWZ/7GhvRO/L93X3/ZM51K7VZxEwFnibULApObDZQBFipYdfKlrrVrqtdj2 +M7Plx2Hh+Ivt16Kj/DqRcRLcWVlaM8rp4QAtjn4bDYArFEGGi8ElWFRNjs5ztE12 +f0Iu+yqGmvDn0lHEocNf8fgxHIN1uJ2sYImS11Yn7xHp5FPb7efvYh8Ih6voCaTg +NojHi61q26YIU112A1ylStV4xMKgxt2rqRvmc6UTnWDtzNO9jp3NscQVHtUEJpv2 +Jd+JsDf1c/w42KTwTyOAz5j+D0acRmw1YRsv2BpO5tcly8nvdMX9k7JesdiQL9bx +ik863yOLG0AOMdWtZPVG1BAuiUUlbBS2RRUp3qsP4OuJ+eVKlXFieX+2NuzqyddV +CywaA+R05nutX5R34h3Cm2MmQOERAk9FUeHFX7cZMAXQRcmoBZKtUfKOGUKF0zOT +ZEs7xmHSqAOTx8ufDU26pnbyCxOBYwn1DVX9nYSskMGMSfGU -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem index 767afd6e2c9..8d9b887b033 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDBh20QUvlucIAd -Db+ujg8QfJVNcQo/+Ipi2T2XC8z3bS9H0dU0vhvCoxkq8c5xKYeDW5xUe3BOnyrb -gvzkklDHAXkYl2cY1tfrmIxqehxiWLsmnlSjK4T9luwQ/RrjLtZdJOvuikfIBglv -tPUTh/FMuq7wiPmhpeqYMGmys2FiLtOBICLHQoI/WkaZMsaE8BXa4gyZ4G07rV6z -wgvi2qM2CQQ+LZ8Ac0n6TJT/taxD3PzKbeG3g8iwPb6deblWEDWiSqKb7gqH5aix -NOEIXDHbm7FI0EopLrc+cf3IRJclhvMP+ZIwCmZncCS2v8sYNzbAqD3Zko+GlcEM -CtLHmOaOslKXQA3NFtgc0PL3rkKEl/61rH/5VkHtlRu2hJ/OE0iaFFsIFyyEULhX -coxNd8NFXaayaOmdO2LrqN3CfXZ55ofyzmS/OLSwuKOKkrJ2+VI40ux1giA1bbL8 -+tbSsgKSWdSL7KxLIY+tNsIayA0wmeozIaQS6WSI/BkcUkSsaQvjaruWabKLwjVm -6WAJAZ7O7L8BGWkXymNUpum24HGZNVWjClCDx6lI53VZCo2e0nqbVwn9ir9fMRGl -wy3q4PfSIUc1JTb7/zCqxtWPbpDtsW+1AbxT85Hoi9PTB00dW2XL6MIkjtyJQbwo -sY7h7q+VLbBl+MLM6Uq4bWgmKmRt+wIDAQABAoICAF9pe5lFNyk+OMNaMGsIKaAo -s9kyxmlkiSGT1fweesZvo5RGfNVcdsY+b92OyodbAK7/3vJ9yxBQA6qmiTTNeBqs -/L1lg5Qo89n44x1pp43LDjXLcJHjllUJ78euaW+g+1l/pvv7W8lfRI64yez/pDT+ -gtoexefuq1qt36aVLolf6bGrHMpdXWaLhOvprxsCXNwFf03r6h62ak64d/C3dyjg -SCG5jz8DF/FZiUoKkOwCSvUoOFSoGazePBhaV0f5hN/G6SV0NefrD4CSP/HFtmT9 -fOmXm0Bzgi88tvBh9JyxTCtPkUWzEuqjTM52f1QogIt6Rsf6LF2ffkYIHgP+u4e5 -dDFw2RAZg1DFZrZOyIKW+fyDqftdSj5LEN/AKofc0os4qzCXktcl+x8KSxlGnhn5 -i9V3ZwvaV4ivMxWQ7EO1Jftqgy2OBwTiFHkeVQ8eC7676pTx18UmN0L2heybA0Rq -I9mrzYCDYBn0HGYdb0xplPpDuj/g7zLx+Edn6CEoaXGwU1kT7SoNwfVsix8/+k2x -jYee+f2EjbHmBKA7UeXkLcMa1QpjIipMX5gWBYa6ACV+xdDrvoMDjrRx7A2j4FF/ -T/tv2GjRWeqWB6bySVA7l92Lltm3RLsW82GBr7I9a8xScUYLa0AfNeeBmS5AIgCA -5FhQ7ivlZ56VWemfiTXpAoIBAQDNp6syo1svZJIBQiZIQRxh1H080wtYDi5Yan2R -loQbjbEitzXC1+rFNtspHuOc0frv5XBvbxOJwXmEsXcS8sWDQBKadr4RDdPGeMDa -SA0If8aWN2BjcILfY9VjJzbL4DY2Tfua9OmIrGvx0LMfPpl+6LXgZzCgPv/o+39i -Kv/ISHEsJITloVeuMq5Py0k6+KXepYWAvdrLkEfxg0gl7Ds2TZDOs58gZRb0lW9+ -u7VOtN9EFd6KZtaU1HfBXUBM57Jlt+aIBOGom8LB/XX7bBi6puIqZOK4VqGR+29j -wj1lnX7npfy8dZoCEplTvp3AY7pTO1BrkNluOcacmnSTGtfjAoIBAQDw59P0M0/z -82MwpGCDilXgqXY+SWk8JvRY5raAthqWrjiAkuA9hjh02PXmRLhExs9owsptuP5q -CckFOyoILZFA4IexlvPgyW2rPZFyrMpI98YYG6Lyw0aMSyDX/rBKJA9cServSjzc -1eLT9uPP43L3EkWLtgocHnD1lZ0YSkAe2ehTGEiNOnaW5H81csK07LPYdEvV8wup -aFNZdcUMJt/V8/C2nkAQpLsgGMLRKYgw3KX7JmtkQsldjK0j9rl7eVH65LdjBXuH -cRMS1qRJgHl5+Yemr7890aGomhD37RMYUKbd0NCQIn2ISxtWF0pWra8+zg0fhPiP -EGPbeNVVI30JAoIBAQC1B/Vhu/7Wtzb3OJ2uWPJ9A6nC0xxXRRRy7Drg35gnERFu -t8vxWlPliqZdij1enFDCwDu7PBH833zy260v1tka8lnt8rzZEkzrlvxcqbQfWSsc -rF6C6lWqA52hjLFlwla2cusauqCgmPbkhIxI4rgHyR3hDbT2Or7W/hxh8+v5CBvn -ebYq1V3zj9V1lENAUATi+t2MOJPTQYyzApeOGmb9JEZmTiOzRolwf+MHsoClaf5n -VsDxIBmgJW/NnSKvD/4wIDQkY/eojoRgc5dZ3QvfsmvAWdJh5pCPir/BFwko2/0M -OUdDNlp9nJWv7Em9Q4yPG9Vs+rMLnnxA+o3HuId1AoIBACSQWzg2TY1ORKDOYiO0 -7GHj9qFvjPxnQTD5G3wfp5t1J/hD0qsj4w/BGllv1rQBpNtWrVjH+j1n7M3RdAi3 -udMqAQ5wReW5TN7vwlKwbSd0C+n+z9We0+dZQ8vkyScHoBk20uSs1N4DzKC1WVBl -Sj671DhnUdOAv05W/fgA1QiZtExgZCqjU/qFBdW77Fd/kbBpvlTjxcJZpkTuvhCh -GdokY0WkcT7Vcd1mRLNwZU5dPwgGhcg65ss/HcxWl0JpYIr/CeKKo3wkKmvyjg7l -5AoiWHdxN0qPtcScVbT7k6leHGWQWwd1ZK46EBUaBdtwEygqKA2/peY4658VEPQS -JdkCggEAGLjOV5XjLMliu8tWrOYE/nBZT00KOd4er1nojcxrRd5q2MDbqMqSi5xV -hibl8Egasdx+Sc8+HLazGPFizNrxd6Ahs3tBdUpYClk4dqnV1X9XK9WV1Un6ULD7 -qzhYQ1gJhQLg/rPxg1BLR/Gl3LahlDdaLvDFTEYPz48oVTszC1yQ8A/Do466TuXf -qF5yPBkt/lxasqMa7ZLCUBMZDF/FGL6x0Z9HDcJc9nv1dLFc5vggoEzts8S8Rmue -WwNcwQsuXEuhpRv+Uf3pimoNm7qfPx9vKS4qxHswHCoC4yIxM2VaALUSCq4KicII -3UTpTl6z0FqLEeB20OY2dIyoPhdNoA== +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQC2iYA4Ezz6ipwT +R68bv41cDKXz2vRDb2h27UovPI0K77tjvp0VfxKDgW/6Jd6UJ+0b3jMo9D1eiKNy +s5pDEEXF9meR9ATTwDNFoo16c38VknUvcmfwSm6cqt0Hxz1/RUwHJ559EoAYClA3 +0sZ0LeLZ99pDb9PavJ5NB9SV848SC8CgIZZR8HiZclQbdWugBv8jXvSvvWrcoWIM +LkB7+89DnWxfDXwVoVjgyz5GeC4+C5LVWpGEouV0ubHU7WF8IvJhA5LnGdQq5WbM +CM0eOJ5ZpCVaGbLDGxqYS88VsP14g59mbTuEDVNXCWFP1myYjGPklIyMqwLC39hg +G1OW1Ipbhvl2bGClI7681VeoGB1p3Fzw1Vr48/ejw0bHOjNfa7K4TE/TdbTIMnQj +6qcSZMHgvPyEbexmYU5cUDS94qBbI5LD5r5tddtNdl2B3fjYrlSGbnGOj3NHRUVo +tFSmVI7y1UW7cVjJUmsE5NEvwurGy9q+eehsMSU8JJvhCLiupxgyoOypsMixZUsq +rV5L1Dp5wJyDsxB91XY1MlJ4lG3xU+Dg7Oky8yJBUkiQze1uJJT+qdTrdWVJAdN9 +BWKRbda9/QH1iz1i48MyrzFnp1mk0qWSETt4fLF24eoLe81WSjTK3/ABSbsjp32G +kKVMK8b1r7iXVmRE0u5RYKEaycRg7wIDAQABAoICACgRktW8U1xj5NLOn3+l0q/s +DtmyrH/JCtNgTzKDRiqqaSYCB5VaaYP4e84bVfqLsR627eAFjRsdP1PEXQ5vmgFU +j3OYbx7UR+z3O7svcywXFCYwJOS4UgON9iro73Tqjz/a0I1/7CJa0TUPzYRfNjbG +k2DOQWD4mn8qQt4Pss4xSj1cYhTmhnKYiCHm6pMcNhFbnLafC8AWpOErnfgZVGvx +OIK9AQn2ev4NX0Q0yWHRRJAU63CEGX4/7OtimE2Zlj75e9vC7bHk3WXYYL5Li2b+ +Azz9+yGc53+a1IBcc6dqrSjcvX3FNxAZ/QR7eycZWiwo95lBSL/iRysBlJ29VglW +YScc2Il/xWNp97PORwsJEDpeWq5UYdinARFK6PAGjnxmADZNAeZHP+r1C5CQaw72 +y31aIrhL2s9wRPZ2DytIlkSmvffIoNpZJW2AyVdJn8L37Aot0Hwr0SsU8/zibvZ7 +4d+7/6rnPnE1jTZlpnDgyH5e5Mtn3YUYDlPAEQudfYyvh0QrNfSOMnetWSYTh/Oi +40iQM2vaKDiK91deTR50g90A88eSgxWMGG6WUzqNoE5CwwiNQxHPhrmFi4H1V6y2 +uaF3s0Gx6aF6j+ws1ImbgrkpAbvgTCENoDtmS8MYbZjXgTzwnG4UtIwqdc5bK2B5 +i9mdb5w1v2v6XLUxVvKhAoIBAQDhVgmw/ssCscde91dWLMylm5Wf+Q7Ry32eYSr0 +UXqYUbChHkYNK5HpVY5b6Br7C13FrhboDk0hsz3agOFexcrua1N2huyJ8gGjlAzz +i+286WuwbhsX9rYgqTvAZmQYpklAfWLZH8nlwtt3iafNhgSVaa//A2m4hhZagElT +pctVakSyG3OYaNDTXBDOnZi9xagc3eWmxkS8PWFaYC0DJCw9yf+9ynH6+FRZg75x +t7nnDd/eSxtW9QUALUCheOO+yIp/uJUiIyWR69cfojQ2vNx5t8FzpK6EqHFCujhq +e+kJB81BAc2P59O8oGqw9fvc9pzCQXyFbx7dtl/Xu/JyMEqnAoIBAQDPYH0afED6 +qyvtQ1le6bjLW3jGCaymflgGwc0sm/pm/3XY4WXhrmqeSIF3tbhP2HnANGinW0wP +nFd0vAi8RU9UxB7iIUAZ6wXRS8/YQmv5zIouPzSCpmvW0l0IcnqjDUS0IZPRo+UA +FTzS2KIQ/yOsHSZoVNQe/Tsdk7Z8XVAJlq+YZ7o7pGR25yGPleUUbVwbIhoEiBPq +EFA+4BErok4CFQB9J8jLRdzmTEQFjQ/w4w066ZkplcIy009a4+LXIvL+MCPG3qMD ++2K/HlTYfMd+CyozaF0ZGTECtakrK+PWbbTj+VV30SD9Sckk8ZqIFUq18Fb574gF +K2KSq5SkYSh5AoIBAQDdenJ2HEkvcctzJQsbsVbII58yKFsPi8IBjKHql7c2xXwl +MJtL0JpOzH/rB7yVKXvWk6ECHyRizkkqXeil/STTqHvVkRInF83SmO8N5mgaeRcW +x3Ir4JrsiUoodrtFmxN+pn8kx+DqytZprMxY7rPMo5+PuCwOaQTJmTP5Woj7gELb +CK5ajBNM2z3Nxwrc48yz6soRXOksV+w7JzK21rQBW2zZf4T+V1yYyyvBnALF/lYe +qJXLp3Jt1QykaSz4VSYEGUnDzuXbggHknspRTtopbJpg7ul1jBYeruhKiVXoQVnV +3k7MdeEgkk+rdWtDqMU1Daa1hB3Db8DOS3YmFB8bAoIBAQDPDD476F0UKTzlWf3r +9pzLZNuTlmsrnC+VJ4ALjvwWQ+7MiFapWfQXbrrc47FO/wqoLWtj1JJ/b5Ad+/MY +znajYmCXU61lczLOwcuV1tNph59bBz4NR82ZoVTDr1DkZMX4tyGYCPQF/i5JMYO2 +Rpa+LCiBuFhFTH3uTOHBD4Vu3WUaXE4jaEHqOWBXtMgQehOg/45MgfSoGHuWGy7p +itYp3AAt9T/UPD+OLA0qIaoNzxQRgtOqIlzPVA0B6U89jyZfRX8i+nx16FKyEL2T +nBmtrcYHp6Zz/aPiWa+6a8rB96zIhNOhmko+uaG7YgHw5pk+R+T/C/mZd7SmTetN +p7e5AoIBAQDXqOVl33+eRw3CxNCJZsfglrD/Jz8VuZv5MZwRolEB4IQwm/whXdnT +34y0UUpUQHnVepzAP3QjsLKANPUY2rKO+f8NAX4Uakzn43QLI+hZcxx6hVkV6OkJ +Hi9fwSEBZzx5DWEbxmYMIGlaRL1yVff8wevQci7WA4rrztb9D5B0c49ItCrMkLNs +X6+9Bh4zafL/FxJSkTahQLe+KGNXSGGGrYB9M31oLSKKM955ZTRnICPxuyA2hffx +8lmHZ/5hmP+eMKoAJ9khilX4LmnkdXJEZ2w5lQTPUTNP8ggaXvFWpijjUsaXEdkR +NMnXQHpKE2RaT22UJ6z3W+biQqNlhlVW -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem index d60adf1f62d..d5cd522bc8f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMGHbRBS+W5wgB0N -v66ODxB8lU1xCj/4imLZPZcLzPdtL0fR1TS+G8KjGSrxznEph4NbnFR7cE6fKtuC -/OSSUMcBeRiXZxjW1+uYjGp6HGJYuyaeVKMrhP2W7BD9GuMu1l0k6+6KR8gGCW+0 -9ROH8Uy6rvCI+aGl6pgwabKzYWIu04EgIsdCgj9aRpkyxoTwFdriDJngbTutXrPC -C+LaozYJBD4tnwBzSfpMlP+1rEPc/Mpt4beDyLA9vp15uVYQNaJKopvuCoflqLE0 -4QhcMdubsUjQSikutz5x/chElyWG8w/5kjAKZmdwJLa/yxg3NsCoPdmSj4aVwQwK -0seY5o6yUpdADc0W2BzQ8veuQoSX/rWsf/lWQe2VG7aEn84TSJoUWwgXLIRQuFdy -jE13w0VdprJo6Z07Yuuo3cJ9dnnmh/LOZL84tLC4o4qSsnb5UjjS7HWCIDVtsvz6 -1tKyApJZ1IvsrEshj602whrIDTCZ6jMhpBLpZIj8GRxSRKxpC+Nqu5ZpsovCNWbp -YAkBns7svwEZaRfKY1Sm6bbgcZk1VaMKUIPHqUjndVkKjZ7SeptXCf2Kv18xEaXD -Lerg99IhRzUlNvv/MKrG1Y9ukO2xb7UBvFPzkeiL09MHTR1bZcvowiSO3IlBvCix -juHur5UtsGX4wszpSrhtaCYqZG37AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -aCqXR9C9dZPY8ohqtRTPYcmYTURPAMIRnDvfjtUc9896GIJYR696gh3s7oxXo4c5 -85acI5E8LY0zrtq1yOHRSlsmoIp+Tm36a+K1C1H8IZGvwLhUGXogj7eWzGOTTFx2 -OK9m5QiaL3w4p0P2magOlDxA/PCJ1Uqnz5eWbdHT3TBFIgwpRosK7j+/n5eLfgP8 -gaAUhEW1cUxIcFy/nB1TUhrsj60IaDgfgDsKq98c3sDIf6pdzrRuk6m2ur3eVoHp -1gcDn/XhVcF57cvhi0kdBNA65pKfgvHVhFx2YUdb1nlPjkwDrkWTF/HyRxMxs7a6 -g97PvBHvTc8wojnZpRbXdQyaoNjAhJzpcXaJ8qPU8+P8FnFFEsX94nh+u0FTqtZF -DRm8opUwYGrPCznb/u70wlMMgeGFD8BSQ83TfwlEug6J85Kfh0Vp8Z9gD/GNN4sp -RLFChDgU58TmaG+gFAufhUJjDoSwZ2LepwhI585pdePvUNOL+q4hl6dL9pfGKVxu -gwdvM345CJGwbIOhnol6kfakjp3mSqejXGIjnxdzbTKJkGqhwLcL3A06Y37xykRJ -nkHN4ahhLnFEc/k9O1SwcvTTR1Ct06bYGRNbVrjy1RWCsjyCWokSArOdslh3K8K3 -rva3aKss6TWYg2Qjce10pMaluRbIoEkx+0iII9vujoc= +ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgTPPqKnBNH +rxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0PV6Io3Kz +mkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0SgBgKUDfS +xnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9atyhYgwu +QHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ1CrlZswI +zR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyrAsLf2GAb +U5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1tMgydCPq +pxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6Pc0dFRWi0 +VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7KmwyLFlSyqt +XkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1ZUkB030F +YpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJuyOnfYaQ +pUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +L2KVSFlGtuYjmV6sTYF0GlA4V0RvUTbLM7qnd321957ReDR8iRqj9ZnqKDEHeH9g +jfuW+TV/BeQFjTTAXM5Gy+LRz23xOwqcvxKh0WYjGLhDXIjWx1zzzGBaAcyWMllq ++o6LAUadST3yhfncP0A79hPLxOsPgMGd3OXGajKskDNmU3MTTsbtQ065HsBwo07P +leMx2jDkapesUaNaTmXERg6MT86hsknGsbO/tU3dGNy4hBuOX5O6bEkijF6eESLd +U4Xc54yScVvxpWqTSEAz9xHjIOpOZNfW+enbLdpApxq6IZkeVM8z7yy8DNjTJ2SD +aS/xKexqrWjWFxNa/CtKezkaZgmLs9jGGan+hmlNBeuixvJEekPliv6Syj3wvLp/ +L3/PmLgBzZj6iRdw5fky0swCn1qwpgwYRjBSN+SL0E8yG6BGKFWByQfwWbdOu9DS +lN/CPBe73yi8kYY5gBvBmPsrt3VMVRbXBLNM16jO6lkyYzyC48jTdicqpLsHazZn +Z4I6GZoUQKc9WPzSdu6tEXjM6e/2lkT8kaPmrae3JOKnP+lzjZjfplV1NylICNQY +whPWBVGaRg0dy8dZSTGtzygTNMoHS3zYsBGE4MuGZtm/4+x/XLkz32n1k58wAKxJ +JKafNaOReYFxJKd+ML5XnYOVICuw3nxQY+CeVZlz1Bc= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem index 996bcf41d94..9d317f07963 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFGtzLs/dg4kQgBYqaATBtAwv8dB3MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRuMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPpDQjGP -dPUiGXybmTEEbneSxXDaz/+2H4Dl9QLq3NnqozavqqZksseYsvOwLbzdZ92sRLHf -5B1Wwo2h3SbOGtb6CD3NMqV5P/nvHghn3KD60l5Jy81W8aJ+pwra8tVf/O0oDcJ2 -qwHABhMYm7cf7vk3Llt4clQ2g9wj6o4UuCFjGXDqPLxO+xN2Qtx9YZdxUGTrUtww -FgaoJiUqoeuagMtJ2OlmTsRM8VNnddLqEnqTWNtV3hloH709fA4RzudCOhHDwx2p -+zKDwDTnOFgOVaX0I76CZ/eZ3qU4cwIZ5bJaJjowi0XCP0Pk3LYQ+wPrDsIuP1td -xVAlU0rXXePxi0yrwDLzTi8PPogwkMSkfSRf4xlgQRlnQjdya4A6h7VTvh92tpMD -i6EP1JA1nTNebOf5AjKwNHQ6B5XuTRP0PEUIDAfV9mNriR5vnn+oM44AM7FQ+tFw -Jbc8CQX6487M9KGsHmVsf60fWCBmgZicof0XSpVrnMDJdGARzgmxz4z/Eunrr9uu -p0SttZdWns1lLwWpbnrCk7I3F4SZ8On3Yf+RxFLZvBJjvHKfRopi81YkralkKfu+ -Se6TE0QpkPEEaXW5zqvVkt1gW9j79zeBZRFzjuT35F5m7fWi6e7V/W2crtw4lGxH -/LYX397ZP7i7cT6N+g4JYkeLgMy18S1jiZtHAgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAJjXO5KD3JrWkcfdvYgdXVRLQDVecwnvUDAP1R2Cw7+iZRfmWccESKTly1HM -+71ThCt7wSFqSxkE/nl6/4cKgNGHG1Zw0Iy4RCMmA5vxiNzgSmUg/3jyl+smQZzf -8e1iDAezlsEJrohCcUTKocv5fl9qWKspZ7Kc6XKQ1q7YbUyh1ZZpzh3mHZ6XhSo1 -EGMXrlcKUst/hkKGiuONOP3qRjsb+lMRZ1IpIB1uIT/NddnQw08Ah25nVSRtc0z5 -b5edvvzspLZ25brsHBzKtHHg46FwMTi+UfMgQoEsV3DXNB/sWT5V+60AYrPA2mss -5MNhGSQlteinYD8f0LxO2Ocoxl5vArzlgDjx+BY1H2etI0xv3u5U09FXqvnbNQj2 -5kbjJXI+wBXxi+CSy029fBBaU4OpjT3TM8VTFFBL1MEe38ZkAX47HcWOenN+xHsq -dFDZ1so5ZYRbPgPhytEE0CWL8fNatjtPCTQTOrZVZW5uKJfUJog6gYbe/YxSKfqx -QtDf10xpSB8L9ooCyyBIx55YctpEtCuj48HXe0vMBBQvFTx+C9XikqTqGjc2kIdU -GNK2uImXLUas09UTXzm4rGTgf+hM2ixHd8/7K0TATh5eMlK0td/unXf+/yWmBEcz -FaZyYeygPcv4U1NXAFdcY//5qZuIF38H4HbQ2QISH5G6LPff +AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZ +EjiW3LmjGlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHR +fog8InraKCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29 +HHaglJwY0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5re +J/m6E36dFyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+r +wqgSeE/bGcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3Hpv +jYyj4YagdWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQ +bp5Zjz7Ojo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7 +lnff4nEU9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h +81yMaqwbQDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQ +UrwSU7L8RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn +/jBfdFHfT6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAF9fs1tF/yL+eBTf7F/RMdDrb1q9/YZCZ6btJH6CnxCuj4M3o4EkTW2PPSY5 +AeTX0zYaNXvlHT54vjdO+9H3ocyY0HfjVSzttw7qiTkvsssRLLW0PMpc8QMRBpz4 +CmD8vfjY63hKzE2cF5GyP1RveCuFVf7//wM2dfPwrQkIOtKrctejYjn1tOAfgJtX +It+RWvJ8T9t4e3KxYgKSa6eyYxyNMZV67X91C3jIJLgTTLwXXGQF5G8hH3KsclSl +RDE3CAYoyDTtaMlI6A3qDtmvfFKzeltKZc8w7uIbjgHvF49p+n4oh1WwDc/C8SUy +1QAx6DSSW1f470Egtfp0hJKT9yJh7C+/EdeAq8Oh1vMxYKBrtjswCsrFQ+bayEcl +2SzMLez2S/bIFSF0WaDqqIOZDzcjpXjbFlm/px01qoPDk5lkTPGA18Zq8mVc0y2N +R3vYzvfpigjkjXgMcOIfP1Jnlrx1x/4+txR723hUkHQd38nKENepsoEoLrcpmbIl +VAKYTALTle6jJKGf6oZf1TIs09Bc1Qs8Oo4IymubOXD+FlUSmggVwMiST15O5vQu +zdvidRHhAE581DKK04GLmWn0UE0Ko4uaNHAgl2gzZsuJQ5oZynOxmh/z6t+mgA7L +l2qS1WOq29Cq2qWrrfvqbl21LWLrf2X75UyTd3GAlQ19aqLV -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem index 76f56dd68f0..ed0d179712c 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQD6Q0Ixj3T1Ihl8 -m5kxBG53ksVw2s//th+A5fUC6tzZ6qM2r6qmZLLHmLLzsC283WfdrESx3+QdVsKN -od0mzhrW+gg9zTKleT/57x4IZ9yg+tJeScvNVvGifqcK2vLVX/ztKA3CdqsBwAYT -GJu3H+75Ny5beHJUNoPcI+qOFLghYxlw6jy8TvsTdkLcfWGXcVBk61LcMBYGqCYl -KqHrmoDLSdjpZk7ETPFTZ3XS6hJ6k1jbVd4ZaB+9PXwOEc7nQjoRw8Mdqfsyg8A0 -5zhYDlWl9CO+gmf3md6lOHMCGeWyWiY6MItFwj9D5Ny2EPsD6w7CLj9bXcVQJVNK -113j8YtMq8Ay804vDz6IMJDEpH0kX+MZYEEZZ0I3cmuAOoe1U74fdraTA4uhD9SQ -NZ0zXmzn+QIysDR0OgeV7k0T9DxFCAwH1fZja4keb55/qDOOADOxUPrRcCW3PAkF -+uPOzPShrB5lbH+tH1ggZoGYnKH9F0qVa5zAyXRgEc4Jsc+M/xLp66/brqdErbWX -Vp7NZS8FqW56wpOyNxeEmfDp92H/kcRS2bwSY7xyn0aKYvNWJK2pZCn7vknukxNE -KZDxBGl1uc6r1ZLdYFvY+/c3gWURc47k9+ReZu31ounu1f1tnK7cOJRsR/y2F9/e -2T+4u3E+jfoOCWJHi4DMtfEtY4mbRwIDAQABAoICAEnNkfTJqKUt9DQyMuAovWcX -6pAYh2SS0gGST0oX9x9wucdD0OCXK73/Ay8oUqSy6pGN3whRzZT1ZBSemnh6KaIi -RFHtdLUl578OTF4QOxliPq29t5OLw2C3Vw21eg2A7DcIK96gXlf6yA/TnwBHM5Nd -OZeSEq1RElvWX7Kc25xE/Fi3S0gBRrv7kUYy15fwu9O3Lk1vBN/bsLwfMXcorjjF -Q2m5WRs80aJWDYIws1hgocC1NvVpJosJWc9QYyMB/dwFTkIuQb0o64LueUhGme2B -7RSeooh7YRR6bDg/aOTK/yED9d6RpVShZpFuuofyRvzu5JJOOgUjrMeSccbqEbjq -VNjkdxoNl917TBezIR/KioXS/gauL4VF9coZostGohDnLIdJrVXwisAZFU9676pn -hFGaZ+vVl6oiWAFwwmbkSo423cIbc+dxMfHtYElbEfGm4LOLrybS3K3sg0gn8HZq -p/AISM2l/282peWjHfa1SXedvvDF1WFJRaB0GuzodkhsqEXXSyAQlc4veNiacNt4 -UQCYiOatcKzvBofgICyX/lTQN/TYdfo57tjCZxnOrDHwpcHH6xRcpuRnDphTV3KD -g2JgUKbvBcZgY03nS7JonzhuOMNd+AUT9Oof1VSS9KKRYLu/eJxl8/6dR7Mcmv5W -fCzEqNVvaigKkeVQEq9hAoIBAQD/73EFWGS+DfKuivyxGAdxAf37eyFRIQYL5Xz5 -xB1h+GjtfdxrigNM0hLHQ+B0209ZwZzl/O37OvW8x8V/Sj/YrY/BYQIGu5quD6lL -NbuEnhLMlU/yANukumNMDbDKo4Fgx+QY+y23zEIFNzraoKH61JH2VrWQ9qfrupaW -qVdUE/LtdZ6/Ar2pKnysLYsUpgZX9mashKg6fNfJWyLqfj3frVD7RM5NTNgwD5as -vnSAkvMLJV1997+57W68E3ALnGxf8UbTxAX8iQ6yig6Oo3bA5er6qt5wbH5m12ml -0UnPvhGNO1eRnX85rkH4okGJCG6ln+wPXbkc2igVH4Tlg/HxAoIBAQD6U3M4JkNE -kVRqke8di5D/HUpsIG2t21DTgihLrg6iNsjdaznvgzRGRAIQCLP14qRoibMsLN8k -zfr+I4QlE2dyIn++fJkaOq/xRvODSDNRRg5M1ttprvAK0QCvbgKgkYxxqpRzOuD9 -Aowpq5YGhHbe8g+aAoYGU9kYbx0XhzU2ZDCqbQP41e3f5OnFrNm8YCzazsJ1fNEU -H7ktT+vCP9g2DtZWF0zFdDeHo4f+h7DcLGaEkvAPpRWQ0qL5tZ7vPZL2GXO9gz6o -aeD5fiMZVUV/SOr51MWIcSIXa+oftxhkZxGqjJrm7Q0cJu3ic7yFXa+9nVaQd8r5 -oWRIvFxRCii3AoIBAQCgdf3VTRs0cUhDGI7fdWJM/uO/RO6Zj+vbj5Ewgz2sy/L1 -W2DT5560Dk2Js0vJ2CpC6vm01ERcrBWfu1xety2aaw7jPi/oCr0q/lS0+8ZKmlTN -AZwy7UjIWbeh2j+KfMLD6+9dnjWGs8B8xqjWo7mKqrWszaZecdbMG5sIcLl41F0N -dh4GQfCpXCL4TEGTu5fklG7BIRbcPKOJ8eLNREMEtwQ71WLG4jP0xgFA4tPmPLu2 -tEGOlcMWDf/MWR95mCP2by5p3M+oCkI1vArucRwmSsBtUq3NrKf/UHVDahkt70vP -0XHQsP8wKcng2dHe2XON0dtNswQ5S6mvvKg0wenxAoIBADmuEuWEQ4nAJwBrYfqf -1yhBmo1xp3QhDe2+lwRhNfQAxaXneDENPXVXZFZHexgUQifoWsW6DSzQ3Z3Dl0Zn -wzVUZ7T/xtN5ZGMnIyND9rcqek8QTvx6F8uWPx8tLSPMhd0HHi+zCHakKKHNbMNR -sIZMpnWpUTKSDXsI1149hHBlA6WxevHScX5eo9Mtsfoq+wrsC3jA6vhFGwkq2jsn -NO2324F42vLAha8Walam84S4ImM37GLeeiqlrnH1fIrJ2FwmnzmyzTRHOVSj0Pic -Ymgi6Cnq8h0vXFijQA5QxSkVaseuoF7HjnbHLZc5bd4ZKU56u6CMSdPdcVslM+xk -d98CggEAYleUQS65Nk2EWj6nLXD8HuVDWbKLXgP2Q4VpORDHpT4isboSTtR6mMwR -huncQX5lvUZqZNRvKIS/IfW/2CiAF+3IhUxx8r+AUyrc7OClp1keH26c52/RYoJM -sl4DfQE/DooWvieQTx4rILiEYrPWwQLYfuDs+dSgnaugr49s431PeVyxlH/849aI -dHnwInBd7sdbN11e6xErry4LU95imkRhg+y+QdiOE2N1Q6I3G2Jn+n4NhIWJfjlS -Dysosye3R96FsFQohVMosVLTL9mCT8J6LFIK8CjRcvhSQmaTmXjTuisIUntBA9zl -PK2zQzz0JkpsDD7mGM+S6apZHiLgHg== +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDVIAS4GRI4lty5 +oxpQrte4dhEWHO+WF/u3rG8P4jFjK3K137zTbrSeB+4TrFfNWaTBEKLh0X6IPCJ6 +2igrsJm9vAy4SWSZaROPYWcXlCcLOMjfFQgk4K/7S7U7L7EkvkxgOfAdvRx2oJSc +GNMNpjpTlXFMTpI53uFR6JCzqJoc86nKkywo8FLJWakZ/kg4F8kcPT+a3if5uhN+ +nRcgXlYJWb3mpVaeLiv006lVRSWAZYw9tjPcKznqPQCL667uVZYAzR7vq8KoEnhP +2xnMIV9ifSfvpcDsbBQ0eziC2N98qA+/+jTetUiRKEYe8WU0JK/w3Nx6b42Mo+GG +oHViqtsQ2cyMR/kHxDELLLhb7pauS++kyaVwd3HsgrtjRL2/+et6dRL10G6eWY8+ +zo6NXpaG7PXqb32wpogDZk8RvBiyrXi1343KYePdoHSMNJ7Fff2yMxOQu5Z33+Jx +FPRP7hCc9BU8OtVY3GT7aahMyxP/GmwVOPKU4+R6iqPDbI8fYT5erazuIfNcjGqs +G0A5s2tSrFPgjyBYsnMr/B5+MOiRI8J1HNPKzqEN3ux46izaZ/F+kYq1UFK8ElOy +/EUynjgjgS4OgDF3x/36z0ZzUI1Bv2e7QU40yYZt/KrnMchyNyeY8F+r5/4wX3RR +30+uddFdEPX84M6dx938h6Zy1tasQwIDAQABAoICAQDIuNYY+OvTRipt37IaCQF8 +Zh4jgG8ZIk9dJlaXVAYFi1cG+chiLSKIr5lHCArNiT8E4gE1wtNzxYcHw00QEMxL +CL/GFMFdRrw4TpkEePDovbtZdvprmP3FJAF00673lw5hlk+SApi7FPPBrBOiCEto +ixfgsSNAw6vcM7eMrR8wY0AnXMK7b9PYdMwxge5MfgJXyUuNNOvbY6eWmKa+Qnqv +ZcjXYCKa6YtWkr4pY+005u7U9DQViNSLypYoMXlYWFzlNkqLmW3EU1jihMzgFxI5 +tPwW1TpEsGm7H84SVeTuB26F9UUz9vJ4W8DmxZz2JhNaOvifi056BaKS466KlbWo +iZgt57ajj0VmYxB0ZL7QgQqb2xDZL12kU1AU09QAXJnpy/RqvV2HloKbqrOd5h4L +oME6j8vT6Q8o1vsh2zJuLXHAsMr30XK8x1HhLDDzln49gq/d3GrZkNrPDjcumiwI +o6PYR91Q4QI11kdqR/3005wV50g847uURFNF6J4ziDeDGsHqj2phmOIt6d8vWcFo +XBEovCZkXQUSx+4NgAAy1GRBjK6tLRRQnS9bGgkELS8+Jx84NlgKkH3m6+lKNJQ1 +o5SpUqmk1dYnpTv99U2+5qvA/o9SVy56wlfuo+u0GlbMjs3OmItXErg46UBPhd4d +HipFZrBItpw0DYAF+voLQQKCAQEA9ePjYyy53VGLq2+vRx02IZOMQOLaaBDfybtP +51ksHfCPg+XZ4jWsDH4EPn5DeUmjZ2nG8GYSuv8dAQ4v9kArToLDyy/qhXHzaING +uSd6KlTGrVrPK1Dyu2p69xYrnduD6Vm06sJ4olDq792rEj4/hdzVwrtgw+d1ZLXG +3ropWgrHQT8z7+B9CAIAOXhYlKrV7+UdbAod+j8OpCIHk5X3+NkT4Ht7biqzQvbo +pJJILFA1qHi230N9YR8ng3PHQYObYJ6NFBrxhpXIfXwbuPyrEApY3zaL3HbkYC52 +aAI3zy7WOqZSqRZ6aDzXdf2EMGusNSxj9/TAZhTAiJvwHdwBowKCAQEA3eNC/iMt +kmy4R3FQgti0Zq+CBUErMn46pQhBCcQreI/a5U4UT/iY5WGutKXp45d/BM2ztyQL +T/8p+85RkasVF/rJB2PwlzUZKAAq29nGXuV0I6N6EiMYa2LfFLzrrleNamPQ9Ubn +atp0kiyLiPZ6P0+Y5wZMirHlMyup+fzG6xsS7KVy0Z5Fy4YetP63r6xCVQ+Rdu3l +dvXqGb2Bdc9g4OxES1Zj7MKHg0b3ce2lYaL0cq0z3kJ52MAVbL9tQQOstJX41VYv +/QSVIjC5VACSa2qsqzquqxYOyT1U0l/8innHfD/uY/8907/q/JqoO1hU5LtvZ7OO +ZF/e/ycZCM2U4QKCAQAXUIJQ9v6wk3jQyoguEAD/8gOMa3YWA/OUJySOZRAfzp1s +/jBImJo1nQU9/67aIzdRKOBqDuObw3C2lufJS5BPo2p5K5PrD0DrGfdsuueEeAFW +kpOuIcDCMHh0US/ViejaCV10HPhfO5jrIXOFCU3wnV3PVwD30kx5PhsbJz+ggAEg +mKOODRUN21K2IEkV35TlaC3//n2VKsFyop9hSQj4GW0fDdZIPdg9czff0tbxDLHp +xXhhdv6+ZLvUZPfxqE7lPGNYEq3v+ufFrizav2pg3PpMP9nHD6bbz8v+VKeCB4jc +isSvr6fvlkU/tMgB51OuvwTDj/tmMnWG/nIoAqJNAoIBAQDWiLYsS8zzJwUhplDm +winiosz+0Zy3jE6dZBamH7K8NbK6RLzk+YKzPbgSV9yFPeQEu/KIH2SEqzxnh3tc +cWLKtaKK77keKaux/j9yI+RlukqJbrVHNgGVSppQTb096s8DT5Eopa54pNFSx5j+ +Cvn1nrtCm9eDvi7SQ+RrnVii1qF8hxc1z2bCOmIUM7dcNhxIa+4EZE2ZsHjw/EZg +puqPbkE16khhEVC+v+3djJ17gngBLK/atMFkrYvJgmhbFPd1/w8BDf0GENk0npGB +w6/OBez+/ZUGPCR9tDv/z+i35rjWzGVs78tSodvM8qe4AVbLdOJpDLWfHQbaAm51 +EXhhAoIBAQDmZVXAS4dTDAp/cGwPoXOyFxu+UNnGnAKO0S3aJW9qV6E5N7jsLqzI +4eD2Mk6chkBrO4Upmwgx4sLVnDMlHQGvoqpWUZES9k6oIgNZ6N7KXnvFm5GI7mlR +ySA2LftCeSb4BzQmwyX5wcVjOzfB6bkSgEkvuMFrRStSL6+79XZCoh54jBm+fW6g +up6oXa0+lJbyO4Qrx+oWoe2G9nrUJzsjV1Gj1njnxDECCMrmB+X5P4D02Ac2FgxP +rN+bxs0TpvO4fXsvBN4B+/dtF2Hjgo3rQm5FQ/NmpoO5lAs1VZPjVUiFCjhm3Fyk +Xe2nzT23gDTuPny4yivLAMHPZPfGLLg4 -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem index 89cde777a45..f36eb94205b 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPpDQjGPdPUiGXyb -mTEEbneSxXDaz/+2H4Dl9QLq3NnqozavqqZksseYsvOwLbzdZ92sRLHf5B1Wwo2h -3SbOGtb6CD3NMqV5P/nvHghn3KD60l5Jy81W8aJ+pwra8tVf/O0oDcJ2qwHABhMY -m7cf7vk3Llt4clQ2g9wj6o4UuCFjGXDqPLxO+xN2Qtx9YZdxUGTrUtwwFgaoJiUq -oeuagMtJ2OlmTsRM8VNnddLqEnqTWNtV3hloH709fA4RzudCOhHDwx2p+zKDwDTn -OFgOVaX0I76CZ/eZ3qU4cwIZ5bJaJjowi0XCP0Pk3LYQ+wPrDsIuP1tdxVAlU0rX -XePxi0yrwDLzTi8PPogwkMSkfSRf4xlgQRlnQjdya4A6h7VTvh92tpMDi6EP1JA1 -nTNebOf5AjKwNHQ6B5XuTRP0PEUIDAfV9mNriR5vnn+oM44AM7FQ+tFwJbc8CQX6 -487M9KGsHmVsf60fWCBmgZicof0XSpVrnMDJdGARzgmxz4z/Eunrr9uup0SttZdW -ns1lLwWpbnrCk7I3F4SZ8On3Yf+RxFLZvBJjvHKfRopi81YkralkKfu+Se6TE0Qp -kPEEaXW5zqvVkt1gW9j79zeBZRFzjuT35F5m7fWi6e7V/W2crtw4lGxH/LYX397Z -P7i7cT6N+g4JYkeLgMy18S1jiZtHAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -h9/gIGo83p/F0NDDWGr4P9LxdrE+kkaZF2BxRv2rkty/OC1Qe2lcJPWaEOQY07an -witmxGPaZB6e764J9z/C2l8hoh9LFAIZ+gPBzMEqPBO2/5WYWjjPzzUlkTBczIdo -mCZywvYMrdPQ/F2LnZd8iQO0pmFUGC92vV9Hb/i3y7RthnJWRYv+2F/D5ZWE07sz -xA0gwmnY8d8/G+O2CwgGsHLl2tByfnePDqq+ogwRKXtsPeJwWloFvnPRHE0OmkHf -n2XHbbLMS/m8Wi5utN/LpMV+WITQHMxGPGXcX6XOWVFQuesfI7DKoqZzk/aB2IFO -tu2deFdfnrj3Md8PRAQBe0Ufig0gl6EvutdmiNCeiDloPwV4gLvH7SLQmLheOPP2 -CQZh1skRxhPmnJYD4rrsMGv86dlhEvtnxCO+cUrxnUAAr81BAX/Fo1Img0rPaanD -N4/FG7LnU/Rk8g9roN/8v8s62CnyxcQ4UAvSWAaRrudpErDk+L24Ib4UCtcYiGSB -Dj2tK5SMfcXn+bR2HTdVIKHWHIK1X6bS7Jn9ZXlDg/MCyCILeOmW523FoLfTvNVH -IY9MgNe5KDX7dpPCAJFWwmidekNz+sSbpu6Br8IgWd6SuTEx8Lmb9GB0V7P2CHlE -1ASW5YJMgVPJLQ9LQhRIim2+pL3Pz/SM+oijeSyEoSw= +ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZEjiW3Lmj +GlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHRfog8Inra +KCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29HHaglJwY +0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5reJ/m6E36d +FyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+rwqgSeE/b +GcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3HpvjYyj4Yag +dWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQbp5Zjz7O +jo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7lnff4nEU +9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h81yMaqwb +QDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQUrwSU7L8 +RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn/jBfdFHf +T6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +dr0LKtpOa+Xu9PKwnlsM48/ltph4q9+tsu4CeC8XGoLFNbVIALuZZsKZDehTf+/d +bgEtjW8vVnBGAvVodo1MgCHnhPPensDLfyggAULT2X400cly+suGbKeu3kIOlKCs +TQsFdNKOPm17NcpuM1wTik2UT2EWLdzZ25Wy3Coid+ILrf5YZ75djqtxZlYbRiw4 +4IndIjN0bYsn8l6Z8Pt5HdJ1nQnbDZhQrx6FXWZ3eSSmpklfl4O07z0KlXi1Nmaf +OiVcOMvZUnM8pYmNvul8Jus/XmP8x3jSbYzJDNOJ3YV8+OD8DVG3pLM8U1FmjCZ7 +KiR5DNSxZFpHGXhUqDpTrhLgoqGK9chOqPdzU7Mp4taEO9FV8Goc7BCeOKB3Znxb +XDIszs0oBIHO/tsqUwEcWBI0vjyC2pBYQAYK++qwwmvbfWg5lrb7eH1ZO42DU9QD +AVR/5luxImAA11AmSsGf8i+FJ3F63PzSr0uUG7BnTLC03xna7dPdKXS/pGojNVBT +Q5A5J0rB3+4L2mZLE3mjst3t1xHfLW/0RVRqGwz0QUIloZkO6wPN6Jz6l5Q+TgCY +uEks1YN/qlwjHwI3ycT+Hr/sY5igT0OAySo7qa7lN13qTiO2z7eAMDgafNnq34kJ +4OQDCE28Bni0fFRIaqVCqTU31Kei5jbORif2wK81Zmw= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem index 848ecd9492d..376c85ab8f7 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFDtXgZV+Jd7/OrySQd+e1dVblQe/MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRvMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKHIdR/Q -waHkIn5z3cc+HNtMfHcKgVGzVDTobggUpWSwVUwa0DMq3OTcNrEnT6zZyUrrv1n1 -6aykGPjRzJ+SZX1ohu8X4EHssBOGaNXUH/AYyfOKMLMGN9AU7dQzNKjuJGkbBXsf -CtmQbQ+/ThMhE4X2bNxEULLudGEfKKQ09LZaqVjhhdVCbuOhx4SPMDJB58Ko10HQ -t7Mh1B3nUTJXFherTr5bcsazQhC6e5i5ySlBnJDnsa9+DMzopiUqMrqqb84WuMKs -zzqjlZXDxTRkAX7nGEU2wjGCx+moPaGLGZs2f1VjBOR7HoNGMAEhmHQhWsIgBCsZ -nDHgAc327Gz1xqsjVd/HrrywJyZfO7ZhxMdmF6MH7eRQjfZGe0+Oajc/7EtFWXg7 -fWdafJ38HGoO8hVjlthKfAFxM5NWjvS7I06IQRwMGGApP5bx3uFmUUixtl/FLa6t -jRKfzaXbX8b0p8HUMbeyvQAZemw+vA+nuKir3DtNIrpqfeXraCMUiEpI8fCRm29S -BvfEsDXCZxBje+nma8g27po8vCaHST+8sjwnNeiW4w6NpQbqqmnvzpf2ivm1U2su -2H1E0EA58zrUoKD13BQzFjccgwodlyutUfk0xYQLrRMOqggtMhsjFDIuNegnPgTH -t7DSyAAg9H0QBXlrd9Ic/OiFMLsb3bu6eeu/AgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAHeimTo5afyFhpaH30D9j3EXXExt482nSCPZQbYm+taPVxEiJ4vAs9pa032S -LnA2CC4D74K2Ykd+B/mDGgT5lVpnWuP9VL3wpRErRy6TgkYAJwsEnRLGltNhbuT1 -lup3J4dFgR3tOgwxohjY9FlauZBA5Wu1neZDxXK9UTeAmP0HOb8iXh/goXEvmPLA -HAVHmCrSD0lgEpgB6mg72fb0AkPQq1wlzVBbVtaVgByQP561WmGW6eHO7sqwcO/a -/0Fhd299ChMdnzbHToRt6VFET+oEiCOwF+yEQBRWbjPjCjG+6nYHJh6FxE2ABtEr -Ebr3/7//Q6C8uD32swxXjZaCPEtBC0NNoDW5yi2D7xNHyc+4XHJnRo/v2rPry1RI -Bbwepp2aaCrs38uxut/qXka2xRTyDCimDezJFPxTigJoJ9CgxGTQeJe0R0d5uzlJ -FBtIdyJf6HDKzxNJqB0+wJTYiIiSl0VFPtBYJynMXA82SJuyvCMVgqj+uK4xBr51 -APqdWJR6nBoHaFURD105KiQRM9EVHrbnE38xn4DRN3STeKUlEP94zb3fo3UexJVE -+MWWqNJRdMtUE9j1LRX/P1So4c7BeFp0op0CxJrpXlRmRcWV5lBYhK+WtT8oiZHf -SVSJ8Chol77vm1gVVbJVHIrrH3cfWefv/2Y5fpwuQg6yk/u6 +AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZ +HRhd4RPfa89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9Dk +Cdd17Xqn4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rO +yIf2Oo3Tq58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKv +NLYRgExctHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lI +zvnaD7+IIiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIl +xG5nCGbQS77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1i +gMqUSbiTF1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMw +kPZSr4W+QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq +2cVZLnY98IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7W +wUBepmc1bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3Eesz +TBXOSCqNBBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAADJZ+I3CJs6E9U2RjIzi1lMo2sYgdcKJS5+yWW8CNjB+DibKfkWjgWvq2K0 +i3hT0Uc6y+ter4OOeIkGtofOiUPekaZsQkPpi73sabwhDVnlki9QL9Ayrd1qDX82 +fMM5roL7w/a+YdKzTQE9hiwPoQhrpj/2mhu7LeYhidSqwzH1anU5YtTKHq3ZrdGN +imhnklcmbqfcNQU0K2l2bu5vuJXFs/v5FCp72ux2p6QDPWwMbwvr413wibt8o7ZT +bBGsQ1MtfJynRVwLGLosn+2t3NPJTfjd4dMEsZhkDY0EX4vbE1/X+K09EN7jPOHe +aJ2AOt3cO3A2EHCR3Dbmt055C6Lb/YR6s05dX4lBT8zY0knsWSL3R77kQoa3+7oR +hU46ydU6K/Kt67nO938WBvFgI81IatRVKVRsXfTIP2oEa0TkwzuvS7nzj3czNU8o +EOa9ixawVYRlEkcuE4KE7x3TcLEGa1gYJDGbsXAfJct1Hur1SJ/rTDwZvlc+qp3o +wWOLtN0mVHEH1OaGlWmeeTuRG16CuTcku2DYiqeuRNy5eZddSuMOag/DKnIN5ZqV +s1GNrpnxPCxd/KFKtdGl+l++Bc9dBmkd+r1dJ/kRGvhul77Zm2xEnGdyybIs64iQ +gvXq8d8ohbZOPxswiFo3p8fbBjWjv0qm3UnlU3P4B3RDMrrC -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem index 9807809578f..f88456215fe 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQChyHUf0MGh5CJ+ -c93HPhzbTHx3CoFRs1Q06G4IFKVksFVMGtAzKtzk3DaxJ0+s2clK679Z9emspBj4 -0cyfkmV9aIbvF+BB7LAThmjV1B/wGMnzijCzBjfQFO3UMzSo7iRpGwV7HwrZkG0P -v04TIROF9mzcRFCy7nRhHyikNPS2WqlY4YXVQm7joceEjzAyQefCqNdB0LezIdQd -51EyVxYXq06+W3LGs0IQunuYuckpQZyQ57GvfgzM6KYlKjK6qm/OFrjCrM86o5WV -w8U0ZAF+5xhFNsIxgsfpqD2hixmbNn9VYwTkex6DRjABIZh0IVrCIAQrGZwx4AHN -9uxs9carI1Xfx668sCcmXzu2YcTHZhejB+3kUI32RntPjmo3P+xLRVl4O31nWnyd -/BxqDvIVY5bYSnwBcTOTVo70uyNOiEEcDBhgKT+W8d7hZlFIsbZfxS2urY0Sn82l -21/G9KfB1DG3sr0AGXpsPrwPp7ioq9w7TSK6an3l62gjFIhKSPHwkZtvUgb3xLA1 -wmcQY3vp5mvINu6aPLwmh0k/vLI8JzXoluMOjaUG6qpp786X9or5tVNrLth9RNBA -OfM61KCg9dwUMxY3HIMKHZcrrVH5NMWEC60TDqoILTIbIxQyLjXoJz4Ex7ew0sgA -IPR9EAV5a3fSHPzohTC7G927unnrvwIDAQABAoICAB52pRtXD0cBMr+V6MJuPzfK -GLu/picwud/2jlWGMbiafY1FlUO1Q//yOXg1O0sEfWNWreLuK9Ce27bqpnm7stGg -/5uA5vDy9RrQGeh9t3/Z4xkxQcdMGfFcJ4ZoF/fhU8jY1jjfWQcrq2WmM7jlZww4 -ITp+wKVYwmBRYjE9aYD25GGAoddM2Z2SZoPfBIfKIi5k5ZeWiii4a30wA/mTLW1K -jBrLFHs24O9OdhyFi0vx913PywUNGrLw8ewYnJHKqx/m5mZ97M2RZQbc5MLfO+rd -8BOEYp+5DRTB4c0L6MfxxJta+F5fkpjMfBiSb2caqsDYD4DgIym/EQfsAYvo4b4m -R42rfg5SEiLV16K3ePA2lEHWGmgzsY7PZzmJUGtytHd+NrHMBWloGp0Jke3LV30g -+3WnvXX+/MZ/dtH2/hy1qpZelZR32h1fchSnKqGUnSBcfMjP1YQY6FrTqNxnAxbx -ufLve41IN0+rCScZ/bp9FEd63DfdAi+U+RKSYjF5promG9ao0u85Yw8iF0maTbEn -oOS7759oGi6Y8udWuAXqmbo1JRw7ZIVKiIU09cR8/IgnGBgOO6/xSOrY+nUCzFvz -gVYqOSWpqBilrZF70GUy8iRfPWUAen5Zm58UvyYmByn0xV/aUKyKKcxzfGeQ1JH/ -NCdITbwdLyVhqiKuw9l5AoIBAQDTiXYWOjyxkXQMlYczi/w8pRaCvYMNJGzM2qmP -hn9ChIoi2STv4DuqYUmbW6irT/mtcaTA8EPwC10gpj0FWXtM9YYVhnoXl7rfEZUy -oYT9DFRGx25cR6krqg+sxqV22He8Ld618jn6MISrQS7Zd3rLx9goQXeTkLfD7Zdz -fwduSTMKBFp6zQdBlWf6PhO0vu0Cjhhb0Mr9pTYOYMmZhuIKHeuGU2kZT+Myxn0f -+mlkEBQ5bGr0h9Su+ROvU0vXMmr7SklKbLk1TZDPkduqfub40N63aapto3aVMjUg -ff1urZh5/wiAgKzuLcucLapAWCkdhaoLa5mDyug/yRLs8vRpAoIBAQDDyczSH5CO -deWarOTcREWeFc0Up4AD6EbVZBiMDuIMEtT2hMO3gLiF+/XvEptBO46zMp7LK1hY -E7IP1f87OiH7rOtQTVtsVD5UWIYQaikvCG8OOtOM77zb/OyNYx7mp1Ne2ZuoPKgm -pAhIhdFw9XYiEsdBTnI541htujhpnx+x8Yb8H7CpQ7WR29KPd2sytb0KqFWcEN49 -fYyC4DhI1Y86VzuU3T6SLdBqtq0FtA9QpXYvoRvfqoK0wMb/k7m7ptld65fIQ0On -9pnJCEk63msWSVxPbJzwfQOgEeP9f+Blg8S65kn2kb7DvwD+SAfhtjItb5RLbJpC -MD0eND8RtDnnAoIBAQCyn6SjaFHP7277IVilYlOAMjcr9zMuac+lvA7qLzxOwvSS -MKJMHCDF4pjxIxjub/35Q7lHYps7m5zr8PQeDE1d0wWCL6fxPCKEMqi1tEZEF6Ei -k0zRh4GGaXgLAUK/dFLHCETDhuMGLOoaYlRZNdbvNLWGaKG8bbt/KqV01ZNEwXLj -xoFQqMizzKGcYcbqHT6tpadUAJ8oR9W5lmklxwwgVtuG9fANe7PyMEGAO0e7QwiQ -5Mf77KbfiVvh9IhaEyzbPQNeYSiTpgfd/uBqu/X1rQoj/on3Qszjdx39beYPC29x -tLVNLVrZVLpWCwl0g/1T/IZ2VkmvTCuJkRT2GMHhAoIBAQCXaOZG0TY/lZk8ptxO -I1YbTQzOHkL3wpeUytXY6mGRqLac9ktNC/SnWRT2D9OU/PP8TPdUc27cnk6jxICf -UvOY6D8KWOiMvBF/tP3oj9DNmJ4ZyRB0+6l4Dv740GDDSz9EKNEhp9b6Gvx06Vfx -HUKAUzlYncvkTJbENLEPrYkbWYdkTzWVkNFHvVH2tQlnq/hH16ptCPZ1YzRYugGN -AerD2VYwZ3DOJzP4ctEXigoV+f+OEe+2Zuyx0CuU1q9aGUwcP+efbbVSnXiMaSYI -qpzyHUWP/pTXvjYhgfRoxI3Ks75eM12bm1aFlp60BwxhVz8yuAlc0t3wtdFuHEVx -8YidAoIBAQCQYRslpvkKLynGY18m4oViMjaCvt27fepngAuuVFEZ9cJZeWY8GMcc -IJeq9qTtlMuiWHnq8oGVCL6BnFX6BIMT0W1knCSRqHcwjQByZ51X2Bo60TktdPOD -c6lILIdkYCFLs7fXv0xUZihyIIdYaxx3XpeNwgaqM+wFsbSclF3U4cdwm5U+ltOZ -L/3w1rFlyH/+ZWIItBC8N9pHD4bayiavHT99E+35Vgtol5jqIhKsiGVgoj3tdDW1 -+xBdyrg6JLXHFP/vobY5mvLLGdcwCXEd/b+jUK9Uhdbq2VC27XEllIwGpPK1SnNU -7tLJO1z1/eDbntbQC4cvewqNRYhwlnWe +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQD63ZNmGR0YXeET +32vPc9gMv0zm8ueOTeZ/UfpKUL8oZwm6+NWn8yFOQ6wV+8vHCnL6O8vQ5AnXde16 +p+KGJYkr1TSPAdp8JAc/OwVGE/KTVQ/kFHNyX9j+Fo+l0DOKXfTbWsO6zsiH9jqN +06ufEko212K6gGwmLl+HLP1Vi1sqvT2qQcMRD3kO+ehmUt176tSHwjSCrzS2EYBM +XLR85FFj3+lgEtJSsnQXAqG2L0jHxMz5GwZSIbg0XTJs7o4uBXnzg2OJSM752g+/ +iCIkQuPiGPcTYXR+rNFKG6UiKiwksTiTWMKoVELeoDQDtZnSYVIq1f1yJcRuZwhm +0Eu+8nqASyARC9be7aaN/wHRM5Efv8EtdXUqXJZUo/uwSWigYUylWa19YoDKlEm4 +kxdRLfT5wJEtY7VMtcNyWuxcj1nzPJ7fyur3zUbnYP8cGNImNURXQl7jMJD2Uq+F +vkK8hmj6ptEZaQqSx2QTsQQ6X9GXAN+7pZYeYY17J+QkcP9GWKWp6q5K6tnFWS52 +PfCKySNUH0IR5Q6lD+6a0eDppPAu3nVwakKhP9Jqi+wBIaaTsi3nSxQu1sFAXqZn +NWzR/h6NdQmXknU2XyIMAHA97+jIjAOZDE0XFuhwBzUt9PczSXg39xHrM0wVzkgq +jQQW5PqFvG8ZEy0XR0YHZS0+zeqsywIDAQABAoICAQDAMTs48CqTPXEvyW6OS+EM +uw7OrO/r3RCnIIYRo1UgPfh9byA5AJLWpA/V88eF4SJ/RYp7qglEMcvTuYVZYq55 +j2kp2rCphOysa6o5qxSf/X4kLerYiEf1OhGpZh3mdt8doqbrmnqVd3YarD0CrH+B +DnhMDBFPGx4CsNwRSqd40ezJYIJyspj7eUisA/Y9doaGz6ltKY/HoRba6fc4667T +RntEKIdL5f38lv6PViB7M/IZMrQf/kdijrgQLp9s8LMiddmvFsHDN2XzRfdqMnjm +AlxgU7xtRDc/gHh9+TNClSeT81+GmK92YeQXp2yGehr6SGFYr0iTkIomQpSVYK2p +0haIIjQMHlc7E6WVkDELdpAxERgvV4uDN9iEkd4t9oNDPPRioPJQ4bhbMSxCO+CP +NdFHTxIbaDr39OdgqNNE14j7WJsFaCsYXH2NFF8jvwIkPQ3QVMQT/JPGStkyF+9P +5IjFfQ9aEF2i4mAVYiG0DE3NyD/OOI9/uF05POn15H9U+bA9hfBE0Rtm9nMqfVy+ +zgmajXkVb0jTHdL2t/UKv0YdgaglvDcWGFdEUskjJoB00NJwBGorSvcMZiSTxpLD +cGRqywRHOEqNIAbKv0Dt2AX5ZdBSQu7/z1/5Jcdmx8vp9lVhQKeMzYxsFKE4V7fr +ztDuPOlFGyffxpRenBIxUQKCAQEA/XVyoOW1cSFqeG46mjw+dbwjqRmLtEVhAMsG +TtW8pnMJHZ8u7lfM/UJyMN4NQEPJElrABns6I3dqPOwaKOy1a2leHMg5Vvd0/uqp +s5a2fduP6l9PXvhhWDN2sChbeKhl0jJDVnaTO7tiye8ZGMYOM/AlfQX/+PY4QNgd +O7UwcLKhoytxtPtHFZTOZp+cECdTvlmX9lZoNEzFp0nfzFaLVwDsy0B9e6KGt1xJ +fV3Drw7p7PeUyYBNKkyCRVee5S/pn5fT7pkIxMHvaL9BBnWVpwiH3Vi0hfTfFZk4 +8tLcVZgf3n0Y4dMVP2VQRF+kKBTL0coLne36HksQEJyk/4KZEwKCAQEA/WF4z6kc +YXwsU5847+ywq4ipq9efadkMDaGzI6Ez06TQjRYNsZGplCV9fiGxKX2YmZyFzjTf +4joqOmI6UANk+JZKW0Eyyak/TnxugrjMFq8WnK64cIz1TK054tAM/bHGkavaYb8K +bCfbKmaSkwkTbb/OasbQqsC7jbALdbM6Ae0PMrpPmI90YYIMYLRogIaBqCkB43vp +GEZN2VeNS7blhRMiq7YBDXn807aSMQ0+skNSQ7MA8F5i4BFvWyPb1nKZWux1RWLZ +O23IxGWmoGho1CAaEk55LXbqLygU5ZYlBSqkrP9N/elJykOp0LwpjoYBgjMPmanz +o6jy8XIUP78MaQKCAQEAi8+YjqaHosMTDyGG1AN9VMaWSTYdOTC4JI7ZiO0f5hU4 +pw1i/viRy/Y2NTyXxKZfqO9EU47v8BZ0FO0MNRz1qi1yS6Aq+Q0BjYh2Wek9+0j9 +JwSyLKoIUHX694sbggAqQnuVZ4F7EAz6nnd0uZSuyvmiREfl/jgbqbFM1t3IvbHb +tb1GONYPTRlLjZJnrQV0jWCwkaLyUj8zHGeEuxvWOwT4mdmWHnf1pfmTVEM/qTYp +1Zxwh4JtjnKrvYJq1PPMBEvlDQ1/p8FuxbISNXTxOzVadL/0vJvp3ukpX9Du14xV +sA4DhrZAVzsUvtKfI7jtAWlZZSGbwdAYKYGvBn7M3wKCAQAHZWX6YcxTSCWfF0G5 +NyZ9C1Mwke20UEKaz0KEYrs5jVENHTyvFzpk+actHFyogmMG8NuzBjYWy23aIG3l +UgQLgY+QFFogKtGPP/CV3kEO1HOLhUoa9vJeF5xd84a9jQfnzqVkPwhV2d/6392d +byFjDbs/wKfspA2VeDMNb3rc/Yd5CpkyMdXK1tn3pKx8O/Di8Ld+ZWqLa9nv4y9b +q24NsV5MttZXB12K7IRd7C4NVAu9sCbx3T9znO6sMWLEYrn5Pne528XNh0nZ+cGg +YwvUTU+VgzbkTdlOIRRjEzvnZ7RA3H7xT3L49XqqfiOUZnL60vS8nopfF5pn09Wl +erUpAoIBAQDWHJQT+Jvj+dXPC42oIRQKCiYtp1buM8YyL+dJNi7p73brF+2Oqx3k +XNT5eP9GthGqpVGJ732FWJDbPViuZB12zlx9tpGF3ghQTq3p/95KOhGEb2fG7mnl +bEcPqOoFEsAlc4DZYqsDDUvmsifimKm20ZWi4VjTqQJUHYCegJsjrA7D4obGbOxX +FujRMq7/idXRjEoWLloQTMPAQ0Uu4Omwnea25daRzrrrJ34uYrTO1sNgOKk9JAem +rGgrOzsRVG1aNwddcZT/t/icLKS25G7AszLnrNFxJB7DRAfgzpHkJBwNLpcPVtfR +KB6GTGRi7uqGHYScU6+wRMHjdVzdKGNM -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem index a2b19bf835b..7c679b4b367 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKHIdR/QwaHkIn5z -3cc+HNtMfHcKgVGzVDTobggUpWSwVUwa0DMq3OTcNrEnT6zZyUrrv1n16aykGPjR -zJ+SZX1ohu8X4EHssBOGaNXUH/AYyfOKMLMGN9AU7dQzNKjuJGkbBXsfCtmQbQ+/ -ThMhE4X2bNxEULLudGEfKKQ09LZaqVjhhdVCbuOhx4SPMDJB58Ko10HQt7Mh1B3n -UTJXFherTr5bcsazQhC6e5i5ySlBnJDnsa9+DMzopiUqMrqqb84WuMKszzqjlZXD -xTRkAX7nGEU2wjGCx+moPaGLGZs2f1VjBOR7HoNGMAEhmHQhWsIgBCsZnDHgAc32 -7Gz1xqsjVd/HrrywJyZfO7ZhxMdmF6MH7eRQjfZGe0+Oajc/7EtFWXg7fWdafJ38 -HGoO8hVjlthKfAFxM5NWjvS7I06IQRwMGGApP5bx3uFmUUixtl/FLa6tjRKfzaXb -X8b0p8HUMbeyvQAZemw+vA+nuKir3DtNIrpqfeXraCMUiEpI8fCRm29SBvfEsDXC -ZxBje+nma8g27po8vCaHST+8sjwnNeiW4w6NpQbqqmnvzpf2ivm1U2su2H1E0EA5 -8zrUoKD13BQzFjccgwodlyutUfk0xYQLrRMOqggtMhsjFDIuNegnPgTHt7DSyAAg -9H0QBXlrd9Ic/OiFMLsb3bu6eeu/AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -f8vcJzjwqm2yUx1gYOt/BzfZ4+VNvP5CSIBxRAkT4judk4Wz07Pb1dQT351HcU8X -+pJ55HfIgUFyOSR1gKGJSV1HsREqYyaJV2KeBQM+klEeagYR+0Dt8R7NYTRtqUmV -lyoW7eHlUsbvUa0jCiwOK/t7WDr9qH4ZUKxVYSpJNa8FO058SoUcCRFue5TnTxF8 -tHH+J+kzcagcS0Rk5CCFWCtNE8+0FdfUs8IUYaV8cw8PEqdfrfJ2f/Zj0I37rh9P -pjuqe+GGPp7hv29YJ4bRd5TSe05vol2g+LYx2JNe1sr+NnGZVDVolsTg50cEwBo9 -gLW0ea/4Y+OoOAqFOdVM+RvfEbgpsT0LpHZAKXfiGi1PAMzZ0bJcOH8F77mV7OcR -qNcshdM1LkMSojGvoVQrRP/Bz0CVjSpwBcmkGiehESkaxNNsUyQBla84v0GDvuL6 -cA6NDfl8iPz5W3kk+2fypgO7sw0FXQVKjq63gz4XAQsGP8JzF1cC4fDnoRRsHO5E -UdWE98/AnVZ7mQ5bC11TAuDyzKGh1FNjrYFmsvTnMIWo3Ef5Tc5GXfYC+fVryfDf -BAbw71FprzMMFoAIxiSCPzK6y/am7BdGM5IZN09V4BBMg8QwZiXtzXWH5JX5PKm2 -f15IkScIvUliS0RepLfI0CXcFuzpJKi7eHLqca0cli8= +ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZHRhd4RPf +a89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9DkCdd17Xqn +4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rOyIf2Oo3T +q58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKvNLYRgExc +tHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lIzvnaD7+I +IiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIlxG5nCGbQ +S77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1igMqUSbiT +F1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMwkPZSr4W+ +QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq2cVZLnY9 +8IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7WwUBepmc1 +bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3EeszTBXOSCqN +BBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +WLhSuFZ6pnoDe8LQx6eMPXzRkQb1qsJpyjpegUxFe71o2e23V/1yMnTfFiO+DsBQ +PP8RkLWUKAvkAvqPyttJBx9U5ZYspsSsTVhPsCjUFZ4IG+fc/dVP1ZRid5HQJz2+ +bFf4KPgErZkJZR02Q2q6ZpKq9clRzbDkho56OZXLYI/o2Z4xADbhzpa0xt8sx533 +bm0rKvz85WxH3cimRjKaGKzuKg38ZaXmmUbsigV3dzImT00KDWmMmaW9SB8lIm2R +JToms0Qs+mOr9qD2NiRoiUd1wmgG2QpFDViIqAZKJjjeesmeV2CAcPfLztOZBim4 +6bRIOIXDhYYOyDgs52XuijXUr4BR8aQmqBrjnccCMcGE8Ol5ZH/IDg4pCRSduCWe +T7ThhH7BpAWYdgF3ITcp5oEcpXK8IdAMAst1/6vk7Z1JHIOejxksbLsGDYkaLM6w +yTn4X3Ak0X6bVmLAY+xAL/WjAJhVtDPqGYAmpx4iQ6QjYG/8gRdOiUI8H7MCK8+h +P0auhyyMmO+kdhNnzwuX/eeLXZfNvnyK4n2uHWYgwV5I+Kv282zw94UIQgwVQ2DN +/IbXD7K57s7+ff9Eff8L/B8rt1i1cmv01mEgQ4kMsLOClGaceGcz/ivfzDCosmsk +Xg/zVmdunUY0lswYL4SQM3BhWB3xJ4likHikfQHklM4= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem new file mode 100644 index 00000000000..5eae58da627 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFWjCCA0KgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJHAwDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTA0WhcNMzQwNjI0MTAyNTA0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRAw +DgYDVQQDDAdjbGllbnQ0MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA +353z74lGXkwEc1n6r/M0FS1XTXhoVGMYIqK7HPCBOEGLeyyGwGfSQ7lRfT1xSkii +zBGG0Nod9cRT1CAewOSJ6BjVfkQcGEjlnVYm42nD6PMd9iFJj9Y5atPeFNvvr+wF +OFX+E8FRu8u9aEu7MIj+KCqoqBukFhFgJYX8sMbRROfLOPaCq0cSC+Vod4qR83+W +ITrQ5n8+/CC39uLY/oKgAKdVnmff595Uy76BVdYzuit1IRKwJxqIWMRrfNI+szmS +hdj0AHwgmwEGCaTNcOQyqvBLxW6qB5tc1FyV4LYv4iNftroqNQvlUbJ4UqVr55Fh +vZ38C1BQ4sWgo6FSS/B6u13clwpRzDh3H8tOMTTz1inUtg61Y49p2G8k3kNVH+QU +fRM4xvCkhFzIArgSiJ+/YUKboltSG5K28pegkk8RRMsaQK8g+NScKKu7/8ddRGE8 +454AqxPpzASij+djM0vxzgad6BB4e+iIVdj77NSjAxVAfg9GIjNHG1DZ87jLLgtk +SN2jaYsBRBRnmenslEGDtwO1SeWrzzicVfP9GRdiMLJwCkwv5P5hCzwrCB5eUPhm +tGHm4K8eXDAd+Ol9pKMySC79E5/W372wdbaO1fcAUKvpHhcRZnSusNAJkLGJYCkV +2gzTWlaeX4rGqjNVs4MSmNuMT+a0IafZeZivptxdgLkCAwEAAaMjMCEwHwYDVR0R +BBgwFoYUc3BpZmZlOi8vZm9vLmNvbS9iYXIwDQYJKoZIhvcNAQELBQADggIBAFox +3myVDr9yJkrF5+vB9gUlTv14JIPRd0OFCLcPOlHpvYKEwjRjTwT9oL3zU5PoRPX0 +AiD9sL5TOo0zraiFPUi1k5X6SoW/qU/kOJ/j5CgfChyyyit/V773LitM/cVXZGui +YX32V1zV9+RaCowC/16oHvfjMA8xNOYoYW83FgQ3GrKgRuqqVMT7JAHoDebVSqyb +w5W0G7RH3hHM1nCv51tnT1SZDn+qRBcX5faPUVARzdcRrZ/VSU2RoVIU/fPPiet8 +5TRioZFslZaFDWOLOuP0ZcOj5MsY3vQZtx2/NRgNc+iLF593YBUhRJYqfT5ePW3H +LwbZp/Rvd2kLucYd/W9WhKEzJKKvzm1V2hCDnh5dl32sZgdBzrdKzgyNB723cLR2 +cHFTIEj1Q/scay+iiSoV+VNfMSDQ71vkHqFHNhEqPFUpdF/SeooDFeQaDvYkomgr +Z9BJFtbp4kZRIEuPX+niTi0S/zwi7htiUn17wOIBcydcgG2GXBer5H3JyFnCXM1N +0jFQsuBFRj8xP71xzhN8YjA2Pe+MGYrMWiwaVMLTz8mdQ+Y2aEvOkfXFSaeNUqW3 +GYxAjEkhVCvzhOd6sD3QjLRX2qhwh8NJCJDbkTok66hno8QsHWASbaDiCMG9z7le +ci4dOHzu/buwqS4LVTmFWTn7mkd+FAlSY9Hj0WVI +-----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf b/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf new file mode 100644 index 00000000000..f8425c275a1 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf @@ -0,0 +1 @@ +subjectAltName=URI:spiffe://foo.com/bar diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem new file mode 100644 index 00000000000..f1f17525a51 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDfnfPviUZeTARz +Wfqv8zQVLVdNeGhUYxgiorsc8IE4QYt7LIbAZ9JDuVF9PXFKSKLMEYbQ2h31xFPU +IB7A5InoGNV+RBwYSOWdVibjacPo8x32IUmP1jlq094U2++v7AU4Vf4TwVG7y71o +S7swiP4oKqioG6QWEWAlhfywxtFE58s49oKrRxIL5Wh3ipHzf5YhOtDmfz78ILf2 +4tj+gqAAp1WeZ9/n3lTLvoFV1jO6K3UhErAnGohYxGt80j6zOZKF2PQAfCCbAQYJ +pM1w5DKq8EvFbqoHm1zUXJXgti/iI1+2uio1C+VRsnhSpWvnkWG9nfwLUFDixaCj +oVJL8Hq7XdyXClHMOHcfy04xNPPWKdS2DrVjj2nYbyTeQ1Uf5BR9EzjG8KSEXMgC +uBKIn79hQpuiW1Ibkrbyl6CSTxFEyxpAryD41Jwoq7v/x11EYTzjngCrE+nMBKKP +52MzS/HOBp3oEHh76IhV2Pvs1KMDFUB+D0YiM0cbUNnzuMsuC2RI3aNpiwFEFGeZ +6eyUQYO3A7VJ5avPOJxV8/0ZF2IwsnAKTC/k/mELPCsIHl5Q+Ga0Yebgrx5cMB34 +6X2kozJILv0Tn9bfvbB1to7V9wBQq+keFxFmdK6w0AmQsYlgKRXaDNNaVp5fisaq +M1WzgxKY24xP5rQhp9l5mK+m3F2AuQIDAQABAoICAAQfLTflF971F7/okK5dlUAu +rcVHyuSDTxaUWU6XQEqBKskCcRlq0H1fFRlx4Hy2Cgoo6ItA+fxlugXW8bosfD5C +9ux05O+tqE3WILFgabQJhyvaQTjdggFuFlHcG/bqKs53B0/l6FPF1Z/uhWzHmaez +4Zf3qnadq2AFsDqx73mNrDlIkfAGR1bgy6QocbhDSckjBGa7QbX0BHAQjl9imQBq +FTHuSDpF5to6kLe8UwfDdU0+wvB1lL3OIQ0T8wPqs8Cz1wuLPi6dPjc/SmoiSqzL +8RmaiJfLTVK8wiZ6NTe93y3HELAZoAh5ea5MTkjebSbJmrO6r0L+0Y8ykgnETP7O +Ug9PWeDDE15sNXIQCKtRe3QpHtJaoAJU1MGhNqwm9oKMcuSvBOV8XRuZORinTYRL +Q2ZD7czaT5VZXCQI4uHwE+KIlQF+658c9M9WETxClgUlhbzqig3ilUz3QUweaPvE +tqArjiYLsT2KtrgmsZ2DaDn2IlGSIRjXMZJ3kn6i49C0uhH2YkSZgU9/7kH2myse +3opxE1EbT4ARFWUbGgqXTOc/OSb9DAsxUK2u0eR/4yOMLQJkvxNJWQgwmi6N18iU +WdvTphNtMtmdsAhst9luwNaeJItzTDm7JeWx+MPs8f7PVOOkTz8HcBAvZnISH1Md +0i+0lBrBXbAcRK5X7tvhAoIBAQDwKPQA0uNk4Hemt5yke1jrg4B03OLimHeQ/1PY +I99hThh/RLncYaMxqsd5WkXXbjsWyGidKHYh3/cG9akmgU6D2Z16CFNMRhgBgRX2 ++LJkdS2QSuHPJlB9ERtOOiWFt7IDafB+tMKHE/VRQdxFRtvLe6pQMzP4veVXZsq8 +NNJGAQ8egUa6HDvkXzR2VDf2Kc61t4ZwT4JT6C12GnCfvXobaVkU1aWhcguoX8vI +o3UOkeracEKc/80ZRdFhA/nvPPXCobyjFjLi8WGp6PUySrVhN9eAdZaUBNeFHLdg +8urNvy5Q6mBAByEfHZeJNZbBeAEAw7S5YAxgL96blj2IPOerAoIBAQDuXazpCDXD +dG6XDZ9FS7MKBWriHUa54UgbX4JfQsOOFA8uxglIe5E4IKFSEbetlDORnKZjcmGa +SKTm0MLLi/kKrkhoDgi6HNbmbo9ZmKIhmEwws5L1TLeUdLrWj8GLgNphotOBKs1V +vQQkfh6rzovyFsMj44Xea8Kgx5ONVlB1L5pEepKdIyDRiQfxhwFox719HACSqCEa +06eFNGtUOLLqNMZhpur59bqgiVQtIZKis9juwzZID0svXBElpDrNvbWS1V5MCBOT +6AStW66YkmVWFCn7qQmNqMh4x19GveW8ajgrBSr/8GP/WXiACBDEsunWRORW57iS +KiPmC0uHlMUrAoIBAQCYTrCokRZLlJvtbIb4PY3gFw7xjmCJqn4xw+wNqHpzgI7C +r/hbjsRrrE5DZP/kJ3Fr+n92JAH/a8WDcWrsE5eSwQFBMmR5e/6ffZlLft/MHBBg +cU0SDc9/8chqbS/8xMotphMymDrCZeLvvKAQg2bDftM9d6ufNfdr3bH3eFxery9C +fmQ3hc5qAAMKhFDVWiBRWGn3ckVKJ3Ylb5E7jXQSTFaFgxU+9U/1YYOg5CFJszrJ +e+aTIRuWypOGPnpUwkluPRqgJ2TwTntMwYQ3d+/eDwcp3ek4SHXSYqrd3lERWQzr +niiakqrry92d1BGe8xdXv8Yuxn4yxkkcTUUK0O1vAoIBAQDLY0LW1BqL3B1A5m6w +QhdSxaydoz1l/cP5F1W20tDpulP6JSBmqIkQy0bbMCL6CSq3ZGLVGBQQAUwzZo3Q +AG9PncZKgy8PHux/UncejA5LfBgGtjL++6bpFXEXAzKyRhAQn065ODxcnBucx8CD ++ImQ17tKNClVz70SUzijsLKWSzfmlm/jhMXMBJCyle+t6EDXL72NZchZi5+1GTU7 +d+Wx0bY0PKji/7luob8hgzQLgEnp8MewVNxiXLyE0c0bIHR+BXGgjoOmAKN9CG3B +4ah1+l6YTXPJW+syo2u4gPA2BKxIiPBX0laA22bmV/t22vKL0dzECpSCo1JeR+T6 +mwZhAoIBAQDpLqRLxfZk2TK3wJ/bloXXsRg4TjSQ4m2Y3htVRiOQF83iERVRlAwg +9yKlyd99ux8tlYAK368Q+FFAwYTvUyghmfVTPARFTmeX0F5u+MX00WOa2FhPs3du ++ImYeQH3hg2O7qyVDMCwtqgIIuGLNwsVPqUUF5bx0He7wTwwzQmx3EVCOu6yZXG7 +Aw3qpOM2VhrtWgGP1mTONiUg5dh4sGbXX70gjG9cUpo/Owr69Q4Y8/OEyx9bzqSW +5BeVN0vONzQC+LHG5EvgNF6yOU7iCkuoDirZUrVchuAf+IDapK85TLIH8bm57LKN +Etg/x+MCoqlEQBVgnY7f3suMB89XerER +-----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem new file mode 100644 index 00000000000..224484f6611 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50NDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN+d8++JRl5MBHNZ ++q/zNBUtV014aFRjGCKiuxzwgThBi3sshsBn0kO5UX09cUpIoswRhtDaHfXEU9Qg +HsDkiegY1X5EHBhI5Z1WJuNpw+jzHfYhSY/WOWrT3hTb76/sBThV/hPBUbvLvWhL +uzCI/igqqKgbpBYRYCWF/LDG0UTnyzj2gqtHEgvlaHeKkfN/liE60OZ/Pvwgt/bi +2P6CoACnVZ5n3+feVMu+gVXWM7ordSESsCcaiFjEa3zSPrM5koXY9AB8IJsBBgmk +zXDkMqrwS8VuqgebXNRcleC2L+IjX7a6KjUL5VGyeFKla+eRYb2d/AtQUOLFoKOh +Ukvwertd3JcKUcw4dx/LTjE089Yp1LYOtWOPadhvJN5DVR/kFH0TOMbwpIRcyAK4 +Eoifv2FCm6JbUhuStvKXoJJPEUTLGkCvIPjUnCiru//HXURhPOOeAKsT6cwEoo/n +YzNL8c4GnegQeHvoiFXY++zUowMVQH4PRiIzRxtQ2fO4yy4LZEjdo2mLAUQUZ5np +7JRBg7cDtUnlq884nFXz/RkXYjCycApML+T+YQs8KwgeXlD4ZrRh5uCvHlwwHfjp +faSjMkgu/ROf1t+9sHW2jtX3AFCr6R4XEWZ0rrDQCZCxiWApFdoM01pWnl+Kxqoz +VbODEpjbjE/mtCGn2XmYr6bcXYC5AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +2JVul/xcJ+YlepOHxJ9dczIcXEjjMOBxWuyK+G9/6wASgHg9e2SB+WS1VSeUARC6 +VkID3Jlwr1gEw4gR3lW2h5I21kdCaBfCHshUoOr9rV5uE76r9kxgyEsMUZMvSClC +eQd8VK4fUT9JEKigIJeCFT9IE9PyxrdH1xpp89jOLy40t3PkubDi8WR8dvPckg3N +juLU/6EtbrtgFMnCqB2TmH4mc6YSCeENUTvt+nSiBKZUblGDuIxu/edX3SscS0Yv +qPM5LPcNHEGeeMC5ZSfotaSzRP+x3OlV9VJNROG4brbaI+3kECtegBgFvKIiK+JY +m7dkt8oIpQc8CKZkM8Kk6e3JXHzKf4vAiWHf0Wyag3gqCukxdas/PMx/3ROi7iDm +XQN713lxhIjtqfXQZjZcRmYQwdnkaSY+H7hgAyhnavqkBmPLeMU5hffdBswrjH+0 +fD0FOIDOWNM9e2Q/qdtHxtglNUmox0ETvl/3gYRkN1I56zNan6FNzGMubilntt2z +xXQwxP4Jn+RoAwb5U9mIlaLJ73FDbl6KAvFSJHlZl34R/o1nKOOAiFSv4V+RcTMd +x49P7cyAcW+eSsIgDzqabhx1OcrFEFRtBy342w5m5Qdq62TpFmeALgRYhAarA9UZ +YY/XOglN88K/3iR+A5LO7Hdv0Q/wShokghcSdAE3JOo= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh index d6126d361f5..a09b7b2874e 100755 --- a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh +++ b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh @@ -13,11 +13,13 @@ openssl x509 -req -days 3650 -in server-req.pem -CA ca-cert.pem -CAkey ca-key.pe openssl req -newkey rsa:4096 -nodes -batch -keyout client1-key.pem -out client1-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client1" openssl req -newkey rsa:4096 -nodes -batch -keyout client2-key.pem -out client2-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client2" openssl req -newkey rsa:4096 -nodes -batch -keyout client3-key.pem -out client3-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client3" +openssl req -newkey rsa:4096 -nodes -batch -keyout client4-key.pem -out client4-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client4" # 5. Use CA's private key to sign client's CSR and get back the signed certificate openssl x509 -req -days 3650 -in client1-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client1-cert.pem openssl x509 -req -days 3650 -in client2-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client2-cert.pem openssl x509 -req -days 3650 -in client3-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client3-cert.pem +openssl x509 -req -days 3650 -in client4-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile client4-ext.cnf -out client4-cert.pem # 6. Generate one more self-signed certificate and private key for using as wrong certificate (because it's not signed by CA) openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout wrong-key.pem -out wrong-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem index 53ee3185b2a..073c6485bd2 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem @@ -1,33 +1,31 @@ -----BEGIN CERTIFICATE----- -MIIFpTCCA42gAwIBAgIUf7oSjl262zqxycxCt9R08BG75GYwDQYJKoZIhvcNAQEL +MIIFZTCCA02gAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJGwwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwODA4 -MTcwNTQyWhcNMzIwODA1MTcwNTQyWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTAxWhcNMzQwNjI0MTAyNTAxWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w -DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC+ -6f15rjbEl5DanEWi3YsMIPNwQ08waUrqFz3aCKeGcRujKb7uaX/I5LWdh8x9I++E -E2ccFlnJTd2dZKxjY9jd2pRXVHmVE7XLpl8qtlLtLjlJ889G3tbgwdLZqaClxJxH -0KQHH5wUwweqlfbteXeYUAIfhDRmoOL2qhUPLYi7E5/vpMeEL+tMn6fswuk7JIR5 -64NLUVC1/VXtjDli0YiIhE025iWL7FukUAifJKa/kYEAPen71bZcpT3uxYnALClf -rANg8uJ+DTDPeNjZbLjNTPRfqk4W7pwG8w5F6RhrMPwgqvBNrnne7OtCD2lzlFU/ -sQiBM8HA5gN2CJuDjl+F70KTY3nRkAxpmHvSl6RPTEx4egdcb69A85LmDjyV+S01 -8tJQgi8TEmXM+TadQo3Xz/6+MHBIr7MknRRs0l2wCCiNvkE5MxsT+Pv182wxGArF -aIvCU4dZI6bI2WZbYclR41Ud5O6XhQKYu2AhS7zY1+nhAMAvqyD5OmIIjKWi3GHP -vdSCljBQzs1tuHgTaKIeTTzFetnRDzyoRDbLbo/OhKEAybvIEqq5iXUmfz4RG+Am -ZUdxBZ6muueAf44mVQBKx/OB1BIzObaKy5s2gP+c486WR15lqVcu6FkPf8tO15c2 -lfpDXE5IvRBeSgi4vVdL4ceJtrfMavd9pa3YmGk+zwIDAQABo28wbTArBgNVHREE -JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTAdBgNVHQ4EFgQU -eebevYkmCqTDsOG16U3lTCIvqNcwHwYDVR0jBBgwFoAUT/2qvvJBk3zEQJj4KzWm -hB5HShMwDQYJKoZIhvcNAQELBQADggIBALVfgo+K+SHzNrerNVJI8U/50fi3WPsS -rsorNxYHaapTJB7/Ngn8Nv05YI1Er4Npl9X/9LjA0uwxYmW6zPGGoSVqGTXJD50o -2c2MVXrx3ZjkNLNw8OoIROU/JwFZRANYS9ECZVOYY2eHcci4S1D7izZP/7+8V+YV -l49Do+ht1nkpeOadWXsQDPZO3bVbUvkvuYYeaHGzULKWS4sHbXmFizrL3V4XeuyR -SAwAo1sRkYcJP5JUk8JCDW+5XFtk/X638RfZJ/9hGxW2gbX9T/Mgqsmi4TMajeNn -VWkq7+WmU4v9TKZARA0240CIiwmW95KVMYe5rWEB3i9yo0c4kei9H113q0Pp/3Kd -sNCZf14Wm8BhM8uUQTyOUyXQvDUx1JzJyZtXj3zGR86uqEGPPMJj4tWeP/FIcF3v -hpH3s0md010BIjpEVoov6q1qPe32WQL9eGappsiEbJKFpJ4YYXwtSgOnUy6vt0kp -TTuYQWBCUfdhKUmQHvJbFzA6OlIs4RRmWlUcFOFOcS79FzgcWS481LSyMNPNcQNQ -PGmLtxxcoRADs9++BltF+Q8V4MbH5o+ZQt8314Vg9n1AWV6L2poLNY5CxXMd6tVu -wDYHIe/VHyCBqsdh9u7XKpv27xfu4TdxDS1nNzyMm69FUlGvRC5IR2k+IEIKmQ2n -nyRbOKxUBKl6 +DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDd +l4bW8vca9/iFDASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU +6JGnlYdxvWLOEa7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+M +pkFCfJG1zMWR13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpD +nxYa6xSGlHXHDzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEE +IRIQg+k/2S673kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4 +EHgELy9Hs0qnqttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZ +eFTnKYpqCshVAJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+C +BQikgADbURgZT4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4I +zOuJJMxlk2Dv3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQu +hk+AinrsDIL6NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m +6aYuqSX6vQXb7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABoy8wLTArBgNVHREE +JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTANBgkqhkiG9w0B +AQsFAAOCAgEANvZ7QDkHIKRq/g4GPkuiU7DN44/TW4bOFe7rDC5S4z5sh/i/Tvur +JYW7m97vLui5PJf6Vbd7xyl5MFIiz2KzoLi26rlvYcI/BT8mIG8jMg7pJjp/wJGa +QdCxdO99a4SIwg7x8pvWChFAOij5e6RhrIvsEB0LN5kKRSnQ0sW5khfsdFbn+3Iy +VwyvvE+nsCqE+KK358EMHicn8FVD3Ze+YzckX0am9DbshL5+eVQ9nOhUV2B8PcbG +SGzqJF07wOBwCdcn3eY+V98SQqrpGC9tCXmv8qErfkq7pkUGWq15d+miF/gaUz+Y +yDPwgi1pephBJ34IhLUUk0IPZJ23uVv/zfB+SpZ9/5pjsmnapR3Zf725jWrhjeT8 +44i5kNeVCvZPzQO9cTOsLXJbWb0vqRzKsvuSvffDQZql4bMMvhPjMibqCiRuSHO/ +yPlWiJjhkZz52DPJX6+LOeP2pFfUe2TR6IqcFPfUs/bV6aD2L/s5UZfZXWS5i5FR +I8uvcKOWL7NBbdY+NVE5aT7DqfhaRurjp61Aym18FgXLHpDHYV9IpkU34+A1MBUi +bzHZRWhxZMRxYezC7jE4zsZ5CQtSq1miDPcDaeK5vMd/Vdys5MIekqCfKUh+Cd5Q +gfC2QgNgodcWRF2plNgA+3E0dUULR3+1s83gGWGC8/UFW+9dtYV4nv8= -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem index 9d4aa59a125..067adf4e1fc 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQC+6f15rjbEl5Da -nEWi3YsMIPNwQ08waUrqFz3aCKeGcRujKb7uaX/I5LWdh8x9I++EE2ccFlnJTd2d -ZKxjY9jd2pRXVHmVE7XLpl8qtlLtLjlJ889G3tbgwdLZqaClxJxH0KQHH5wUwweq -lfbteXeYUAIfhDRmoOL2qhUPLYi7E5/vpMeEL+tMn6fswuk7JIR564NLUVC1/VXt -jDli0YiIhE025iWL7FukUAifJKa/kYEAPen71bZcpT3uxYnALClfrANg8uJ+DTDP -eNjZbLjNTPRfqk4W7pwG8w5F6RhrMPwgqvBNrnne7OtCD2lzlFU/sQiBM8HA5gN2 -CJuDjl+F70KTY3nRkAxpmHvSl6RPTEx4egdcb69A85LmDjyV+S018tJQgi8TEmXM -+TadQo3Xz/6+MHBIr7MknRRs0l2wCCiNvkE5MxsT+Pv182wxGArFaIvCU4dZI6bI -2WZbYclR41Ud5O6XhQKYu2AhS7zY1+nhAMAvqyD5OmIIjKWi3GHPvdSCljBQzs1t -uHgTaKIeTTzFetnRDzyoRDbLbo/OhKEAybvIEqq5iXUmfz4RG+AmZUdxBZ6muueA -f44mVQBKx/OB1BIzObaKy5s2gP+c486WR15lqVcu6FkPf8tO15c2lfpDXE5IvRBe -Sgi4vVdL4ceJtrfMavd9pa3YmGk+zwIDAQABAoICAFAqklEHmiDYIi/iAg2/KLWp -mn+QDA8gj6AXrVXCX5JBKaFMlMupCksR2r9w+CmuLtPPj/MH4aD5rO78CLVdhoG2 -WKcJJlq2oZaH4JtfXxgQWmcW1XmFdkZ/rSnQJFCnbBZt4orJN7GyKaR0f3E9mb4g -DpwsWBKmSVfZmKk8bhdcSMMI9uyncI9G1W1CdUxr66MEhafZV+JrpCrxQGGh6cql -f4TnhGmqkNrA7rXg3pI/p6Mx3HBuz7o8evKqCUtkX+U4Jl0N8JSMtmvQa4J1OG6g -+1a2fT786BC0/E/X7vSH579R+EEuXCeDZdBiB15MsbVigdc7JAd7roVgXOkTnkE1 -miQeGhP2J4b+OH8fTLS2KfZ9rW/uUFI3kO3duMv6+K14fIzXDZMGduW9f+Tf0gqf -bj5A8Me93fddU5UHgLcJPwqbKXwJjvpnc4c0Ntl5op/2NieakvSJ9l0SnqzGcLx/ -Ufgiz4djaRX/xd1qepOHYkXT0egVec5kJvY5uKgdkWMH5ZlL5viv+7dZNBANAzpl -3K6j5N/ay7ED5cifeUReuPeNw92w5Rvq7OVGWR6lEsexFf+J19fefPo43PQQFtLM -W4T2G/Y0NHkkUsY71CMFN/Oom9wNLq2EGq0apljphf3pAVf5aGNSNwJkLBlmwJDu -NBN2AZQDd99zJ5+LfkotAoIBAQDK2koIG46VTDmhLAOZFNm9wMBALjFExTwo7ds4 -J2GmAphAgjkaxq4KFePc9uzVBx3nkP/4QSWpq/IhJP1usFseSeAR4SCRx2ARvhvO -T+QQ2TTWIfH+LfVM+Vkg38eXJrAF6+UT2EFkvFg0gO6bW6q/SSCxZvltLKn2GX9m -ql3SeFq14r3q05D/sAetdJquAyY4mDZNUEh3rK9ueVNODXSw61GvCABEAJvrNVeD -4iTOmL+51hQaNubuEyxvmmMVv6ougo1MwQmC2MdnwlW5DmUDM4oHjiEZ/1bumEMK -HMUgAYHguwRug28YU4D7MMkKWnhCIa9tEeUKlCDi5eiqtoAzAoIBAQDw7vO61oAd -Aw1lS5UBKEovZfM3aDZKhFsqkhbRBB+FoYtWJyEm6OTZL7oJOVPS/BZFzI/Iy7XG -D+4aeTrK0iQsah1oim4VHp+A6jBOm8VuMJ1vJE/awXXqAkxPnoEaDPJI0DNyVJwx -ah0wH2qAMVi4eXvBHaYemx+KaVxYiXp6veupwWS6ZIQYxKsXbudbs0u8tKEblCGm -KFbeTN8lI2Dm0YwLsrYL4HU88AYYzW1xJAaeN7IV2okOdu8bxPZn33MrQT/Ag58V -FI08yWqxtxQ+2Xf+rpT9KV8GzrCkZ23aSxT3MKjhx4zg3IGEwZYeFmglDgxj4Xeu -WBanAMmddzr1AoIBABqRzPCS63ISsdmyciYy3PJFtOizJDDlxKN3xCbuwZOE83w/ -kks3isQ3ackfbpXYgMo768sQfWZj5ysANVGyN70X4Al3e1Sc0LCCPhIf1LeAO7Nw -bsnkKyUR4+KybOoXXybnZvHaeXZAVS1LVfVzZEH5yhZybmkmWHyrikxgNDnFGdyI -/mcrnupenCLUCw6PywnOpg4qXdOPAsttiMPIb2hc6i4K1j4R9lowem4DyN5mk3Y7 -3BYy6rx0NnXHuwK2QAwnfWp4Pk6tJjEo+yFqCUEKFyI2M4+8Kh7GQsGoUwmGKQTO -eh4fiQWeql48XZjFkMrYrqmSlnV0QXp2sqrpjJECggEANomwjEOP3oFZX+ubABcR -q+cFBi7F01pglKNbHNc1F3e45biXzYn8e5lNIFdkvSapGAW1KnvKWtoySLXWdCDb -ZV5j580mHAvBiVn5s2GZcFb63DS5CsiuG1mH0qILkU0K7yaJ7sBuVtUxZDpITlpd -Pezp8Y33k4gDvL4a1EPSgMRK+zM2zOaB7GVgYT6OinhslXvB2E9Qyp7pAwsdm/eF -MNqQO/mpMckOYMvoZWbi9jB0ew4ads3wJmEPwRZ1vI+dL1ZmyvpAYXI0gmUJjM7n -e06Y1gLI4QGjbBQPcjejaz2Bsm7GW81i+2eOvfFgPNFPo8upTFa2U7XG8ui/urBJ -nQKCAQBq2XWZJFgN9GLRWy7Yw+n0s6W5vyD/c8O9VTuTn1DafOVb5KxH+egcxDcj -0T39nyVm6XhchL96yu947bWgCVOiR2Yu5QQ6xw7em46VZdbTtwjKlW/xRYGJKW8v -utcd9C3kR1r92oLbtuWcoBFcCW2MMhamU8H+Wg6Mj5v9+6FhWcomqHaiS1E9ZA4V -qhPCheE6XZGlo6+ar9gIXJcGP2ktkStAWT+0O8stc8xvF5nS/y35ZJpIxrMOsNQS -9+kxj8ouH6D49LYm2ZFtgV5Ink32zynAzFUm4FcYe0fwo6ADzS/PH5eSL1fX3iZP -Hg4+vB3JZpIQ4jvvlZ+GdcG5eW61 +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDdl4bW8vca9/iF +DASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU6JGnlYdxvWLO +Ea7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+MpkFCfJG1zMWR +13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpDnxYa6xSGlHXH +DzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEEIRIQg+k/2S67 +3kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4EHgELy9Hs0qn +qttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZeFTnKYpqCshV +AJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+CBQikgADbURgZ +T4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4IzOuJJMxlk2Dv +3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQuhk+AinrsDIL6 +NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m6aYuqSX6vQXb +7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABAoICAQDaRKlTDRwN+ndXRlFAhyM6 +6GIopvL9MLmhM+EluY5n2q0P+1rCMIusC8LYSahUW4gh7DucoRM7G9s5M/3e9mcN +E5LNSq9RtF9OC9JGCCVBsXlxyfTZ1l/bdWA3/3CDUtZYCmN5xA4az0tErsdDtaWE +/39V+E/2N8Iu5PYd293zp2CRm0+kbBcCnQiDxt+6yYa1GPzDIw+iyJWCsBrOBjGt +SrBaGyy4LvXZsspEquWHvhPFLWLvZ37qYNroNNpFhbv4f0K19dlJRPpdn0L7oxB1 +VicQvdOrQ4LbJ8B2vw9Ch1wt12ySiJHmXMAUa//4jBSJGN++72NY8uf0Y72N7ayF +Bck5QE0we4i7hhuN0WL+IftYD/O5NgOnprjMWifLOhQ8OECZfhOKgbRU+i3aJl8D ++raQBW7/GM0uL7xIoMcEZSwMs/sQR4loNCJ0UsIeWTdWNrhXrEuzDQGXoWcB8K/r +VVayDO5Qqx8R77HB82/pRdqEWNaNQd7DhPgnWiixISpIGm6zMvT3S0hzEkxu7FNb +uciq9i82BrBkKhg1kiF70FqG13VeMFqTJUuqtoRs1QEQgumvWB47n6FiVwHdDbbH +sUeKZYwbrY22Cn4rrfXH+0KKM9TDR0CiCv+CkSGmG57l5tW7aSUWun46qP8vh7sc +ztzb4LzyUt6XEBIWIqBIQQKCAQEA9+f4TyGo88qzTKaQoko5OAYhvAGr1UGAS6qh +sJpd7rHd9g6DnXyOInpQNglToiJ94mNdIK8f/oOjUXZh2E4CWuxeK291BNiaqCxe +s3LS3XjkdHQIHvqJUw/r4YJ+zfoGznthNbDwDkBob9x3h9rknTbGdLcgaTGi/0PZ +cFnyWDPNPskbcZ3Dxr41oDHiVsOx8n4d4HtspXzh+EPiQiJz5HVfwGNEyzhmFWIa +EzQaxnHL+WF1Pqe1wrzOwZia0Jss8lxbHcTnJupaV5WDvbxY0E4ynofShicv1U76 +B41xDKP/8hFWck9LiMDXk9vrbQIHvGAcsYr5N/jzIrDiEXxvGQKCAQEA5NOegb6m +Ak0mXg+XKhZnSdR1KaWT4/hbVWqYjwxAeAJfMaxjD4MXA8qeEAJo3l3ETkdFCHp/ +lr/BZzNYXnmHU6uvDn2Xq8gPO04ruSV0WWthikXb5gszXdkUH+6fryHn6L0kYJQH +NARQzOgdEcmTP7sy/5GDKzydWbT5qRNOnESUWgnJi9ePjGB9zWxn4jx9AfOYtozh +UmEgofSDGbFlamQic8HGnSJFgOxIZ0AfurPIRSR51gvXx2D5TcsPjLlDrY07IcF3 +DjqfJl0gC1XN5BXdpPvjvNrns+ZK/SRoGlgb8Q4tZLQevox9W110amvMeZj4yMTK +9mgGOSYCzZ6U/QKCAQEA1mBZ4Qwpj1DNRk6PqlfnLSRYTb1gO9UdvdE7a33CFuTn +HZ2lgS2xt+zvqhrcoMuU8o2cfeQTFcP+Gjb2G9gxvzDBqmwC1IL/Esjzx9hWssCV +RoMEds2OrS6Ke4OeZj59XldhU83DeX+HEJylHO1UXwN8EHg/5dfPrVCeGsMdh9qb +9VxxiAm2wAnCU9pvcTpfimQ3L+VrqZvZyRfi8+/ZKkm52KO/XMFTvdAM3mhjcxH7 +Ipd9jQX4bwNZBB8UWaqm7pqhDJg2j/d+0lhwCUZzwwasTV0E14/RlHNsUdWlWhoD +/e+yQr2BgyvIAIvgBW8JA4RVq86S/y0gC/LMO/TQGQKCAQBB2rlaY7DJJsTs+xWp +EiuFrvRNGQ734+j9KyFewcrn/t7An/keZL7B45UbzGW74UZ2tMIkT4TasLMLbVZ4 +UgdlSBqoU/LLiFcB3Vxt+16BwYqfzb0cdorA7pGBIx6nu11PuOd4OAHesYNDhWWg +Ud/jzo89x/X1AovSXmgfhaPxCzeatghgC5iPcNGjxhgbnwbnAeEoYGEUYUmP8pus +UEZ8mPblU5ZCcLOKB/ZKaMT46Xawl2/M7zmZcsos3kzKViMpFmU3MMN/v9U/qDtp +p7cKdlSEf82p82INfzCDq++d7U+VT1w3CDN06V/GZJ31ZrLBKAopVaGHyqZH2i2i +WYpNAoIBACmfr9BoJh1/mbLdd/WpOyORKTbnwstlMgoUcARRJn86iPZuyI4QoSSb +TePZqrWRVmO/K5M65hFjhUpqTWKJGJy5LYIZ4yuIbonJAPNUhjA0bkar9cULBFzy +rb0xmW6sRlBnqhv4aDlOkhHkkR9lB9rTIUW+ankuvVBiGWo4eE8DvZYo30frltku +2K/kqd3NppTl7dN4EnGTo8ROZvr3EMwSu6nE+wUr4G7YuCLdPxwb8gAB8dbmaUsn +AXocUh96kYqTwRxo8FO9SqgQYMf81/ovPUfv+7mwO40oygzy/YkGmB1shFIbQuzU +lJvRfdXyyC9DbllQkxWfdvaanLS3r1w= -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem index 714ef19ecf9..bd8e2e1fb7f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2Vy -dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvun9ea42xJeQ2pxF -ot2LDCDzcENPMGlK6hc92ginhnEboym+7ml/yOS1nYfMfSPvhBNnHBZZyU3dnWSs -Y2PY3dqUV1R5lRO1y6ZfKrZS7S45SfPPRt7W4MHS2amgpcScR9CkBx+cFMMHqpX2 -7Xl3mFACH4Q0ZqDi9qoVDy2IuxOf76THhC/rTJ+n7MLpOySEeeuDS1FQtf1V7Yw5 -YtGIiIRNNuYli+xbpFAInySmv5GBAD3p+9W2XKU97sWJwCwpX6wDYPLifg0wz3jY -2Wy4zUz0X6pOFu6cBvMORekYazD8IKrwTa553uzrQg9pc5RVP7EIgTPBwOYDdgib -g45fhe9Ck2N50ZAMaZh70pekT0xMeHoHXG+vQPOS5g48lfktNfLSUIIvExJlzPk2 -nUKN18/+vjBwSK+zJJ0UbNJdsAgojb5BOTMbE/j79fNsMRgKxWiLwlOHWSOmyNlm -W2HJUeNVHeTul4UCmLtgIUu82Nfp4QDAL6sg+TpiCIylotxhz73UgpYwUM7Nbbh4 -E2iiHk08xXrZ0Q88qEQ2y26PzoShAMm7yBKquYl1Jn8+ERvgJmVHcQWeprrngH+O -JlUASsfzgdQSMzm2isubNoD/nOPOlkdeZalXLuhZD3/LTteXNpX6Q1xOSL0QXkoI -uL1XS+HHiba3zGr3faWt2JhpPs8CAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQAx -Sq/dJHjSVa1g4clFjtdKciFsSnCm/vgzGInyxGL4zTyTf4QXQ0PhfHkiFFCMkFSP -snxavti5HjSCJlUkhB/x4YpqFPQ+/9Uly8RCKKdlMTSiJ30IL/D4dWtmwA83UQAY -ZI6b6dvjdhBNMDb5M9Qzv4+PmF/KMB3KlFTQtDZoAqAnWrtahsVJzsaawK4PPc/e -4IINu2O/aAFnJt+ewwA1NDrkaSlD7Wgu+SAlQRPO+vAKS6Qbs69R/vDdVECJOTmB -FJ9uQlXuhwsR6u5Pl0Df3Jh4K+EXw0nY4LEko3915HnKAQt0F4BTrHjW3Sk2WnMN -AWtp+4D5epRvD5VpL+mwce0PLH6rUb4Ipe9zmApGQr2GAO3XjpfvusvUJPFcWe2b -EfnBxq/Asw1ALqLrT/LKpZHRvNN2YpBLl8ZrzOsNwqVPMDTPUYWf17wLS+FiuCHD -BTdMIoqZ0dmp1ZmENB8h5zM8W+XMlVQlg+LeTVqeEA5Jgr7zuMObQOar0K+MV00K -Jqi2ba/v/zFtN31rH+wULfV8BPdtrVTbJMTrCJKLpAwKjsO7wFoFn0Qk7WNEmPmD -+TA65ilk0xfok/04pkh1gd/Kqzh1LIOpG0kmh410U3AJ2jsF3Sop+apH+r+Blota -SsCHnBqnABNRs6gs5FA1pbD4t81pQl5xoXtCCuZbPA== +dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA3ZeG1vL3Gvf4hQwE +oJhscgkLxCua6FsD8xPw13UvpjCTP3338qgewpXZtSltiCw2FOiRp5WHcb1izhGu +5+HCAa0Ns2u73w1OC8xiUwTf/Qtk2BhT9dxU32YtoGTroqU/jKZBQnyRtczFkdd8 +ijcYkuiMX++nOgA1MrexpkXLiknLSszO4VAEX3pKOYgTfGh6Q58WGusUhpR1xw8x +xxKhKbJPokP3iuqNxHhJYO7++LGUs6pXiJBHxS4kSDzb7gMBBCESEIPpP9kuu95H +QDZcn7M1121BgmFPZTjj2IABBYG+ZY0l2SQU1t7hZciDjSuSOBB4BC8vR7NKp6rb +VwsfHeSP3iP/mcfRjxd8fIJDIucEHj4gaanXttJEWA7z9nhnmXhU5ymKagrIVQCe +PtSoPen+EvfhmdcH87IEtZQO/vaq96k7yPl5yoQtod26slkvggUIpIAA21EYGU+B +M4V76u30ghRxUwOsWc1zRFTuw5Ea2G75KtOmyJdTr/b3GwjeCMzriSTMZZNg79wt +5lKMdx5nDnTvr77Vp2bdx2xWO2TwlevDm0n++2A9zuaJ+URULoZPgIp67AyC+jYo +lcwC1wS/nvH5ZKmQ80ZNZhqYVVxnET+p5+Fba3zfgZm5vKbt5ummLqkl+r0F2+xB +1tbQG75X28SJT/x6eRqFfM6iP7UCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQBc +TPShRdB7ZIL4xQNAxWLGoEbshbY/UpJQZdjojxn27roVwEhwP6B1/KgiVKV2X6bE +a36LUnaWYllIMAh4oOHJkIm2gZ3xitdEK1anCf5NJga7TnkwGfD4jTZA91fWCynt +a/64s0KQggKsUVY12TTJVQvOH/l9RMrXOq+jgIh4OURwCBtHTCS6oOp3eF04pEu7 +w54dMAsxp/N9AQaLh14IZUZAQ2v5kdipL99EEQH/G6llU08XrJ4LfGgwDkUjJSsA +TzrX2uk9MLHJVwjpZ99ktjNBs8Gyr4fGOmstT5TXEOO6bVhqZDC6kEL7vrmFSnDZ +g/9lrd4wLUT/STt+E4Qukedi0n/419IpkIAE5C1HOXRnQaOUdcnrLixUB21mBHt/ +n7gkwdY7Cu77dYvBIShzeCnxiJED0+XrBPD5yPokxEjE3MmiIK6meHHCuIwqLjX8 +I78ysv4COeH0svwSjvJInveS9QRCAWBpdvskxPJR8dpoysAff+jiyp3ZvkwcIiUO +Vbsusorpp8pFJXZxvPDBXCy5TOlFnIG/9itjPj98pFRIl5bxzNwDf4wrkwHEpR3i +jpM6y+/RWZn69BpTeAG0vZHhGk3SuXdU56cRzatys4X3biCImgDqeJMUcAoxlrIZ +vgbJVTorwqQmPz5kt28b8ddnUVobbZEPlRITcqjwFg== -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem index 03ebf989764..b56a10f8a92 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFjTCCA3WgAwIBAgIUUghXstot43OmqoS1M2rmdSRRX54wDQYJKoZIhvcNAQEL +MIIFjTCCA3WgAwIBAgIUMcX2R8I2H8vNtASHi0EoufIgWEUwDQYJKoZIhvcNAQEL BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTIy -MDgwODE3MDU1MloXDTMyMDgwNTE3MDU1MlowVjELMAkGA1UEBhMCUlUxEzARBgNV +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTI0 +MDYyNjEwMjUwNVoXDTM0MDYyNDEwMjUwNVowVjELMAkGA1UEBhMCUlUxEzARBgNV BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 ZDEPMA0GA1UEAwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC -AgEAjQjHTorL16xTiJFAaxeC0GDP0uCIbT1olT8NmWOePURqg7HpVzBtvffxiug8 -l8fF5kr3mfs9A2XuY9Of/A8uDTm+vdSEjdTwkAox4355oCHDQo3F0GsfJwy5f0vP -t1vKhMmvBHM7cN3aaPBnKJwM3Qd1XWxyEeCv2SDvaxhwMO9Mveal7WUGBKms35Rz -PYEVejM9ccy7nZ8+/wLAx2ixgs5I5uo/RkJkCMVGlKk6BPRLFpiG5tdvrUmFYNdp -yOV2iVEJ1McPKkViUfbVUII4UlvVrsgRNqsWn4ukD5jd7a7AzvfVQq6Mhe7SqDc2 -8c2aVLhoqxYpbKmcYYJGQAqlXgOObs6DEcyitXnK34RkltMgjrl1GyqqqS9hGkp4 -XBne+rw+mbH9jfBdQpi4Xp79l0NVQYahS2iX5HFYRBa4i8SCemMGpVpHrK4L+X8u -qINiLlVXIH7FBTBgz5EjvMsgihdBbEKlFLqUJsPJhRPyBmIewZMGZnsO8PR8av+P -jFp5iBFE1RtIcj4mg1QsjnYxA1QjUtPnqPeHqph7qxtIvjd/j+oXpcyal0xkGsh8 -G+sdYZXCktuocMDmm5ejJs5156znU2yHwN0/hdGJYGdKYWHjSVPT6sa9Q/blJ2IN -/CHSf7ao6FPq4XuMynN5N7K5RbQ22oYuCmqcGpQxtwWRcA0CAwEAAaNTMFEwHQYD -VR0OBBYEFFU4ee1zFXfU+/UiBHqDt7gbKn3aMB8GA1UdIwQYMBaAFFU4ee1zFXfU -+/UiBHqDt7gbKn3aMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB -AC3qz446kylKOhNkssTXnjmFrZJhZLD06ijE15rhIgHBP3FXk1RZYcWj0bvERZ2n -zz0IO+Hy68M1jBV0R5YE8SyNVwEec5QiTYhoFoGsBFvlf6zz8oBhSQKtrvpvefdO -8vu+BcvX5C/JcOPrG9y6feTEWa29O7007LM+GRJOe4i2x6xkHGyLWvCSAm9fvHRc -gs6BcRIJLULwqlRaIHHNt8R4pwVaanJ2qQ2Fctgbx9OYkQzQK6hOjo0UUMvykEhW -XW0DT3tYGb9keEG6lPHYcyOcbkans+zJmHxSqLiVPz7tm3pwDtd2OFCi869y60WS -zzQNBhXCYkh782dF+pnOxV6hmdPaLqx3tD/WrT72z1qNK4FEgVIzSy5ULCp3WcgZ -ARvMgCMV7CK+rDID2QHuwNmJIArTXX8JRrV/zkUgsdapGpF5zkQ8rNGRJcDKzBww -CiVPOOthSbf+F9jLQ5nzmlFa6rJ7RxHuj1PVwKhxc8u9bPHAgZSt3uGH4QaFh5bw -3aoGucu2yhnkv4aNK6w2nj04K9gYdLwRbJgTNqR1FRCEXzfaJF1szq0dhXQzlhf3 -kbP/Wh+RhDVqt2Eu90LbmbUl+DbIHiYdjc67XFNbppXNn8NpncZKSDH/xn4KvC7z -ORsznqTC1pyQp4SuvaKYFCOWBzMZ60G5T8CTehP1KmKR +AgEApABgxjCrAiDY529xEjboRE/PVwgVStD0dPE2vSOOqxfVYYOqP7VF7pG2PeA7 +Ek9Qqv2cqaDWbDWn5vT3AiN+aslfHtXJgGjPBrpOQ3Me6RSu2aUHPIguabUjz+kJ +VSkoj0pkbAqqIxKse1hZtUNHVwOmg/PthkpGPNxofNX1kCfBWoNJyuKFOoGeNgmY +6joY10zTaiHpq8hhA3b7WY35QdNGD7SwkYBvTjGGzBr/hu26fhX/DnceZ9kf9n6q +899gB2kZH5T1eTlShh12TI1sHa+BGz1YwR0HqM88zXDyAf7bWl7Hy5f8e9keZdx7 +Fx/ws93Bb3GusA5zwUm1iUe1OIwTLFlL+Kdkr0qzofDcQ0ZnNwrME4oAhCJFwSnJ +OWnrCmUou2XXj86Xl481uBana30zzJ9TniHM/hA54ttHsva/yB8tyoXcI4FASwk3 +GdihsOBbRS6KvmeKpEXQpsvBQ9GejSL/UUWuKg+O0ysHE9+QX/+OznFp66h/x7PP +Q7g6ipwAjgwuG5jm/Czz+dw4j3Qp5N5f7Dn3QhDzmXkKgzRzirKh9XVQqUFRwlLn +8VuzAhD5SjRN0JDE2jlt0Hin90zx/nkOV2b5hTYu9NVgmrfCye6uB/qsK7PQBh69 +9i4+8tBGXrcS2Nenm+Hm12fFhNum96A0ahj134H2ks4JcKcCAwEAAaNTMFEwHQYD +VR0OBBYEFIZYdI/00qzj+5JqEzEJfpj93AphMB8GA1UdIwQYMBaAFIZYdI/00qzj ++5JqEzEJfpj93AphMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AE+9y6Ea3pjIegOeffXzI725vXO0OSuv76vRoswUVQDKL7j7QcvqdcKCK/2hmeMW +MBNwKZneG5iumH2Yp5RQ14arihS9+SYEpgftvfNOZrzZ37RttJhSjBQ7GUorH7pD +uQ4s1unqKXv981JCtlKOQh5ZKthHAMsP/sfYuWg2ksVn1hvFTJZirewVUOgR8zPB +Djl6PdjZLDu1FlglQQ5YUNgsJbAqkBPcA0hEwGU0j5QEncvdspn4LiH2/mHhnzM9 +3QEvsXUxgJo989Am6+E77XNX0wcALj2zUmPPDWYgHgLKO/ZcSAOQ9JaauVhUre2D +7jPwnN47yLak5obVcyCpaDPKYk6sEUZkiWRaONvugoIbjYivmB/BJc0njfVA0kzT +FDwpUTtSddZgHzdTXe0p5C7IGmYkp/vgKlSpSYY6+aCiVApJSdJjL6FZKoOXqDnr +OgoQGSOJif4mDeipKOdrb2JtYwJkRl0c1S+tgOi8PU+ROvZxQGWI9/i20H58M7j0 +r/WhbudhcAqWglk5WOpCodbJhXffCrbUm5NjoFr7AKswxLJVz39WIe/duHPEGV7v +jLd/zj7eJRv5ycDyt91rbGxQ9NKzEx+by/5WIZTi+z+2PG75tdpQUwgEIh1c/XOt +6uXtS0sNnnjHVmXPBC+Myz+1NolYWjZMcBQ2xGIORvm8 -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem index 834f82fe80d..3924eac91c2 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCNCMdOisvXrFOI -kUBrF4LQYM/S4IhtPWiVPw2ZY549RGqDselXMG299/GK6DyXx8XmSveZ+z0DZe5j -05/8Dy4NOb691ISN1PCQCjHjfnmgIcNCjcXQax8nDLl/S8+3W8qEya8Ecztw3dpo -8GconAzdB3VdbHIR4K/ZIO9rGHAw70y95qXtZQYEqazflHM9gRV6Mz1xzLudnz7/ -AsDHaLGCzkjm6j9GQmQIxUaUqToE9EsWmIbm12+tSYVg12nI5XaJUQnUxw8qRWJR -9tVQgjhSW9WuyBE2qxafi6QPmN3trsDO99VCroyF7tKoNzbxzZpUuGirFilsqZxh -gkZACqVeA45uzoMRzKK1ecrfhGSW0yCOuXUbKqqpL2EaSnhcGd76vD6Zsf2N8F1C -mLhenv2XQ1VBhqFLaJfkcVhEFriLxIJ6YwalWkesrgv5fy6og2IuVVcgfsUFMGDP -kSO8yyCKF0FsQqUUupQmw8mFE/IGYh7BkwZmew7w9Hxq/4+MWnmIEUTVG0hyPiaD -VCyOdjEDVCNS0+eo94eqmHurG0i+N3+P6helzJqXTGQayHwb6x1hlcKS26hwwOab -l6MmznXnrOdTbIfA3T+F0YlgZ0phYeNJU9Pqxr1D9uUnYg38IdJ/tqjoU+rhe4zK -c3k3srlFtDbahi4KapwalDG3BZFwDQIDAQABAoICABhnSHVittreyqV63LarHsuH -tO48Q98DHTGV83hzMFJRoEyhYErh6ltQp87tWG669pdeqXFPc8M+w/0qBSjMR5+q -OkMGntkb23RRD0jZ/ZmjvfGqLmxd3MoY/Y/1Qj/r9iL78vjGJ6bj/ILj9jWkquUk -AT7lIOfPBR6BAGx7kg4lmhXR7ywgquXrpioZuccOqvS2IQ+r6Vrgzzm49DqM9wo3 -P44VQlWZ701FNW1LyupIiyWhxtXyuTQ24IldPMSyT8x65USYHRunXdfo6ghAQBag -JV1U6QWYnRWGSCEaxmv2LQ6i+5fKNC6IoJLS2wMEHIxETHguIYTX5GNK83TpYquc -F2w0NcnO/xMPPFiNFwtX6N9/mexE7dDE+gDvUwWA0DodwHQFh9+IBphznr0OfrJ7 -G6I98tIJj5ecSsTnHyW6BzOxyoQ3KGXfwV8RnDoNtZXiK4boizGlrVrj3G5AzqAf -z4dnIox/zP3kPyhw6670a1PuesQ3aTvR63UqLFOwtm1moZCh5lJmxog32s4mikdn -cQiFFz83zd4hAMRsoJ7W5ABeZyM0V6DXd+lGO00bf3ex3cNFskLeSHLoArtiBt0n -xtWfXU7/h4URFCyD3rJPUruSPT8DPcvu+PpxB4MEnZhvpC+xOXp5LM+jHcSsIgXQ -Bp1wMpZtTN+tkJrtDGbRAoIBAQC/l9Ao+6I/NI1mO6AxMuwyFf4IVpSYH2j/wvrX -v7No+i8egiS5joi3boNFrwCFuNDYm4tu9DQmqOHOYFiJXCemRkeyglgn0tSnnfGH -jFQOlY4kX26lhI8A8EqCUiO3XYnEUsQOd2aJFDos1rr2UdIPFhmLIF7+6QNsFtsV -9r6JcNIl+tMlhAZ1LiPYaKsy3GWPptK0Ul3IykBocIf42CNBrbbhMjWeOHNVUzmQ -6EY5CEeGVEGOn8bkSq4mqtzDYVO9rFaX2ySsPtAwoelsxnxLRR+YFrVdIBS4zbJe -zUHMD7g0lDl+gLCTcwsi5hFqUIE0xldRjzkqZgyE5Y6sdaG3AoIBAQC8cfZtIxEr -4vVGl6ZqOsbqoXg1vNL99CQKK8KXBdi79ig4F0ItWgv/RfdZvLAQJ2fVDQcOQs7r -NYINYNJQ2+ASvwDpcjI//M/lCTFY/kNBbUdAYIIrMlNPVoAnfGXCSpxOmw6iTNeW -gvTHamvXR4hkI6XpxI60dmcBUxJExBsW9/Ua5oONZ37nYVL32/PZCba94nMpnJna -6sq+YYWewla0YOZBKgegL/9S4kSt23UaGEfP6tC55puVBEmJZ3ZF57eiNHRWjK5S -oLUSvoXghtNYD8kAE95DIMUfdZudnxcR7o0OZIXlXxdKuWELGj4rXLa52pTuXdUi -4SaoFBIBtqxbAoIBACHtKxImxOs2ThedVVeA8exIkqw9UKDDZx9rEmoj6wwayT84 -wnINOE5rBlnPjGMhf/HrUTx7h8SUwS9/Rg5EwEykQ0vr4tpibf+ka0GaasQOXZY5 -Q+mRWJVM95SuIN72dE/MbCU3pXN+nnKUL5UwlebP/gPx3oB7GH0Kaw8vmzvRO5U4 -Hi2VfGNBoV1P44jlGCLg4ozFzQ+iKNeVna5F77BQp4KnFnGHA31HeHkdeFnfCnNR -9i3gQyLnyoDUXrVGRcLfgpHvX0YU9V63PRNi7l2Rxu9yjfkIlkLHQd+hrxYIbpD4 -8llhRmNFyuIKJWlTCh4jRREPMIs+eNcLo0EMr28CggEAdbmER1325kHdFQK8VfTu -O7owDPIvpbMVqLz5zbs7UQFQ0unEz+xHPmyJQChenVjgxvpihAg8mDhPTOhFu3oB -G/TzLuPGfK8FeYIqr/v4LDGGp7Mi3rWL0i6p1QEj2SMfTwQTOlVjAYZCQSo8wLrR -fC3BAq9mr1qgsvUgnpEck1bIasvDN12GrCni2TExv08QOMP1SfOpMlgn3JO0FCA4 -Zmt6rQ8CwJH7od31Jd/J4kl043FAgEKOw6NwBUT7YGSIFe9LFp7AcKIxPMqFfGT+ -Ny91VHUVDz6zpxmj4+51TSCIQouOEmM7e9UfyP3WBOTgym7BR8Ca3DVoHuya6zMh -rQKCAQEAoy35YYbyHnb5CmVfCIBNk2ZrDERuT8NE6J7u2Y+1/yCmPJyLvA/JzYNm -LifFf9x8+WMKMVaANq5ZitBMr4gMhFQe2KflfSC9rEzfhAoWVqNGjCs79+xNEv3Q -eFv3ss/zU9AKbjepS6+wP6CuILqXVQ1lT/xMtJwc3+YQMyrIYHwUJ13IeGyCJRfY -1/81JcB5pm9sjrgvRccjNV739A8ACm78mlj5DRxasKfTXkIlx/oJUe/EXRGfuZwY -D2FxqP9LLa/gY9DzlUPYpWz3Y/KlbNO+nmakiAzHpPjwwca6FZug9xFW9Ua3SAwY -aH0VTefFJqNq7ADzlJxogNFALN+F0A== +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCkAGDGMKsCINjn +b3ESNuhET89XCBVK0PR08Ta9I46rF9Vhg6o/tUXukbY94DsST1Cq/ZypoNZsNafm +9PcCI35qyV8e1cmAaM8Guk5Dcx7pFK7ZpQc8iC5ptSPP6QlVKSiPSmRsCqojEqx7 +WFm1Q0dXA6aD8+2GSkY83Gh81fWQJ8Fag0nK4oU6gZ42CZjqOhjXTNNqIemryGED +dvtZjflB00YPtLCRgG9OMYbMGv+G7bp+Ff8Odx5n2R/2fqrz32AHaRkflPV5OVKG +HXZMjWwdr4EbPVjBHQeozzzNcPIB/ttaXsfLl/x72R5l3HsXH/Cz3cFvca6wDnPB +SbWJR7U4jBMsWUv4p2SvSrOh8NxDRmc3CswTigCEIkXBKck5aesKZSi7ZdePzpeX +jzW4FqdrfTPMn1OeIcz+EDni20ey9r/IHy3KhdwjgUBLCTcZ2KGw4FtFLoq+Z4qk +RdCmy8FD0Z6NIv9RRa4qD47TKwcT35Bf/47OcWnrqH/Hs89DuDqKnACODC4bmOb8 +LPP53DiPdCnk3l/sOfdCEPOZeQqDNHOKsqH1dVCpQVHCUufxW7MCEPlKNE3QkMTa +OW3QeKf3TPH+eQ5XZvmFNi701WCat8LJ7q4H+qwrs9AGHr32Lj7y0EZetxLY16eb +4ebXZ8WE26b3oDRqGPXfgfaSzglwpwIDAQABAoICACiTq1399qGtLN1+NjSyfx8/ +u+Ylqtb7AjDY6Zk8bfUpDXN2Fy5yFF5lkPiYPSVXmHbmDtftYoAdenBrVZ4i2All +z3IapSNvSyG4ANsxZYl3w5c3/KVecFVZKwYq+1MlvtJNLrGIpfXNjf1qq69seP8v +eQiW1sLuJ5ixU+znJz3GiFFzwFNBXoNORK3MDBiPzUufx4Mv5tfI2S/5RVEwDmbZ +9jC2nSUy6Nco69geKfDhas39dUDH+i7pir37MyLptqG+wCePPHkE1MU4Duf76a8i +mEf8ErSdESMUO0/9TPNvcihW4Qofjam624mKVq4vCegGyvBe6UDIIp3FNfREWLLC +ilit37ZVOHbq79qV+DobLADYXZGXrptzr6VqVZvvQUEwOjftD2B8GJzxbNxRl77F +BbeOhYA/IDr8YR7Qos0HjqDDekRavHKAa5kcf8yFVJaQycirHJZpjr3oNFktqg6H +9eb537SdPI5nHtgTSQSosxst+iBsjMCJ7rU7aowy9gKG75s9eME06hiQsukNcOI3 +hmBqQBeX+yLWh7Z6A2Y7MepHuYHWKereBGISR58bvTmNyI4mLWYwJZzjON2tot3a +MJM86gw83TuX1Qmp3+NjduQtdtMjDSXLN2yBbK4CufQYaTxK1xdHUoK/uvG9kIq3 +tP+/fiTHZHyuTSSgwOypAoIBAQDT2Vj2uahOypboLv33XtFr2kuDe1sogpLaAVu4 +Dv4vO835gto4fx79rK3u2fBxiVx0yv7kwitvpcwaoMdkUEoSBtOugYB9UezW+SX5 +91bpY0mBH0ToSmosPFVc6PEki6LRV+VGZ1gFXU7uZ4Wia9opENfT7d8cjBQ4NZ/M +sCyqHR2KYB82DHx6Lrtrs0eWn33N8BVgsy4xSbOi2YrgQCnJvfPWVYtcXjRbplj4 +jCVGnPlac0Z/bv3Kb7Q7EOS+d+RFi1ZpsFYPbW5KRhGshzOxGw5d/nCjkEXCV0ht +uK6KndjFOvCGfikZW7WVpw7bkCe0W2Ko/JSX99ccJBDyau1NAoIBAQDGLj/wVxss +dllwswBOAV3oSL9xbUPs+Xbr/FK4XKcL7wcY6cfdZBlsQLQCoAzyTZ8t+g756Hlt +a8qmW2/Wvdo+m63Z2ecnbI9pJsVyYeT8pVumx4biHuXbRBYO/0ZZPtB5kTT6Hzhv +ZHxoUj4jb7L/5kwEdEPFIZX4rVEXY20LJL5wtv2zEQylQk9kunjUgrP1L/GtcNh+ +QRzLXiJWAoC4HWcXmdxb/Hc0BU5geSwZL4bbl3YL3lwWvkd3aY17T90EjWy4uA6G +tsHCxbxauul1q8OkmCcLEfhYnDh95YoVddR97XhC33S0v4dYjX/iS46g8fJ0HhRo +9YGWsD+tRevDAoIBAFCp/5/iTV3C8fbyfa1FI0SH2Bz2SV2Bal0sCzpoKwzdHq6U +znaYoLpCl+/MeCsi/FtUN/3umQ9n9/FjqshdcfavNsbJdJ1DJoUsVPN65FL1hTVv +LJOuUgMJ7g70e21I5fQEHb7S9scEIlvQeye/HVBpo2SEvGFoTQKiGHid1EPp1ies +NfYkhvkW9jIqD2Yg0IwrkFhDoaEOySGG58Q/ainw8/l2lRvUmucSzenFoyPh/Wgd +YIiBQI1mPyAGbLLBf9+jEIIprHsvVcFeMLiaumoDPVM44LbG5mj7Rw7QNVV+iN2A +dbkgLJIFQ3z6IUQk/ZlE+qoRkprSuctzSCil4jkCggEAdiYWilNz6NL5yX193gNk +l9nfAGFS0JF8+31nV3AtSqkLAyhEtlE58ta0Oqhub3olPwTILucQlVJg80Kp700q +Mo8fWzRUYaWP7fFmXyXLnW97r3dei6o+ALWbrP81UnlnUkJmYgOA4q/2lz8Iupma +DoOeqD0kNf8q6KFzKc1lsfIK8ym1IC826cMZkAS3ioINhUw6+dq/xq1M3FVXhQ1i +7eDhmClrPQ/LhSDwtAUpbC5waLPodXTwU8LG2oL8DRr0ugUSXyGjz15fL54xB6pN +CpEHRzZKeIgTFci0ySGya87eiuCrBLsxWZyhtQJOznubIYp8sAtKwbQzuMGEhOmd +fwKCAQEAlZoi1SzHstg6PfpwrIHJV3imLa550k9hyAu610CKjMsg6IsFsgu9/J0b +9hkhlafeW+p9zhKSwjl3aCuWUMNE53R+zYmMIJJrBzejC+1H0SKW0Zix9+ghixOX +da1jRaUxUqApJYvvxUC8FbnATM/Eq0ofhGkG3o575SlO+54twJO+bXGAUf/C6xMY +AQUQh90pTbZ96Q3Wdm2Qmrhd/GUaC6k1vAHVHHU8WQHiLmo1fF4gL/TqRv5KEPUM +un6ld7h8BEWtMClhSIiL2h5nvSYGcB6Lai6rPO0UUbGkWBQFpGaeglUmoYi0ciC5 +lMRrRHGUiWHW9C4/siOKYrHBeH5oNQ== -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml index c6ac737b632..4bd30163ea6 100644 --- a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml +++ b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml @@ -11,6 +11,12 @@ client3 + + + URI:spiffe://foo.com/bar + URI:spiffe://foo.com/baz + + diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 22d41bb6e14..756a1e1996c 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -338,3 +338,39 @@ def test_create_user(): == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' ) + + +def test_x509_san_support(): + assert ( + execute_query_native( + instance, "SELECT currentUser()", user="jerome", cert_name="client4" + ) + == "jerome\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="jerome", cert_name="client4") + == "jerome\n" + ) + assert ( + instance.query( + "SELECT name, auth_type, auth_params FROM system.users WHERE name='jerome'" + ) + == 'jerome\tssl_certificate\t{"subject_alt_names":["URI:spiffe:\\\\/\\\\/foo.com\\\\/bar","URI:spiffe:\\\\/\\\\/foo.com\\\\/baz"]}\n' + ) + # user `jerome` is configured via xml config, but `show create` should work regardless. + assert ( + instance.query("SHOW CREATE USER jerome") + == "CREATE USER jerome IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://foo.com/bar\\', \\'URI:spiffe://foo.com/baz\\'\n" + ) + + instance.query( + "CREATE USER jemma IDENTIFIED WITH ssl_certificate SAN 'URI:spiffe://foo.com/bar', 'URI:spiffe://foo.com/baz'" + ) + assert ( + execute_query_https("SELECT currentUser()", user="jemma", cert_name="client4") + == "jemma\n" + ) + assert ( + instance.query("SHOW CREATE USER jemma") + == "CREATE USER jemma IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://foo.com/bar\\', \\'URI:spiffe://foo.com/baz\\'\n" + ) From 1a952591821a9f6969a09875c5403e8ac38e7d2a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 15:17:41 +0200 Subject: [PATCH 071/140] Add extra profiling helpers for Keeper --- src/Common/ProfileEvents.cpp | 7 ++ src/Common/ZooKeeper/ZooKeeperCommon.cpp | 36 ++++++----- src/Common/ZooKeeper/ZooKeeperCommon.h | 32 ++++----- src/Coordination/CoordinationSettings.h | 5 +- src/Coordination/KeeperConstants.cpp | 7 ++ src/Coordination/KeeperDispatcher.cpp | 17 ++++- src/Coordination/KeeperStateMachine.cpp | 82 +++++++++++++++--------- src/Coordination/KeeperStateMachine.h | 7 +- src/Coordination/KeeperStorage.cpp | 30 +++++++++ src/Coordination/SnapshotableHashTable.h | 1 + src/Server/KeeperTCPHandler.cpp | 19 ++++-- 11 files changed, 168 insertions(+), 75 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..a1058a879bd 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -611,6 +611,13 @@ The server successfully detected this situation and will download merged part fr M(KeeperPacketsReceived, "Packets received by keeper server") \ M(KeeperRequestTotal, "Total requests number on keeper server") \ M(KeeperLatency, "Keeper latency") \ + M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \ + M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \ + M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \ + M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \ + M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \ + M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \ + M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \ M(KeeperCommits, "Number of successful commits") \ M(KeeperCommitsFailed, "Number of failed commits") \ M(KeeperSnapshotCreations, "Number of snapshots creations")\ diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 48bb510e589..dff14f74681 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -9,7 +9,6 @@ #include #include #include -#include namespace Coordination @@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const Coordination::write(buf.str(), out); } -std::string ZooKeeperRequest::toString() const +std::string ZooKeeperRequest::toString(bool short_format) const { return fmt::format( "XID = {}\n" @@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "Additional info:\n{}", xid, getOpNum(), - toStringImpl()); + toStringImpl(short_format)); } void ZooKeeperRequest::write(WriteBuffer & out) const @@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in) Coordination::read(path, in); } -std::string ZooKeeperSyncRequest::toStringImpl() const +std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperReconfigRequest::toStringImpl() const +std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", @@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in) Coordination::read(data, in); } -std::string ZooKeeperAuthRequest::toStringImpl() const +std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "type = {}\n" @@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in) is_sequential = true; } -std::string ZooKeeperCreateRequest::toStringImpl() const +std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const Coordination::write(version, out); } -std::string ZooKeeperRemoveRequest::toStringImpl() const +std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperExistsRequest::toStringImpl() const +std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperGetRequest::toStringImpl() const +std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetRequest::toStringImpl() const +std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperListRequest::toStringImpl() const +std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) list_request_type = static_cast(read_request_type); } -std::string ZooKeeperFilteredListRequest::toStringImpl() const +std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetACLRequest::toStringImpl() const +std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const Coordination::write(path, out); } -std::string ZooKeeperGetACLRequest::toStringImpl() const +std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperCheckRequest::toStringImpl() const +std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in) } } -std::string ZooKeeperMultiRequest::toStringImpl() const +std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const { + if (short_format) + return fmt::format("Subrequests size = {}", requests.size()); + auto out = fmt::memory_buffer(); for (const auto & request : requests) { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 490c2dce4f8..fd6ec3cd375 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request /// Writes length, xid, op_num, then the rest. void write(WriteBuffer & out) const; - std::string toString() const; + std::string toString(bool short_format = false) const; virtual void writeImpl(WriteBuffer &) const = 0; virtual void readImpl(ReadBuffer &) = 0; - virtual std::string toStringImpl() const { return ""; } + virtual std::string toStringImpl(bool /*short_format*/) const { return ""; } static std::shared_ptr read(ReadBuffer & in); @@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Sync; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Reconfig; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Auth; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Remove; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Exists; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Get; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Set; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::List; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest OpNum getOpNum() const override { return OpNum::FilteredList; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } }; @@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::SetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::GetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override; diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index a32552616ee..e7ae1f86d2e 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -58,7 +58,10 @@ struct Settings; M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ - M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) + M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ + M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 5000, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..ff26b3171ea 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -238,6 +238,13 @@ M(KeeperPacketsReceived) \ M(KeeperRequestTotal) \ M(KeeperLatency) \ + M(KeeperTotalElapsedMicroseconds) \ + M(KeeperProcessElapsedMicroseconds) \ + M(KeeperPreprocessElapsedMicroseconds) \ + M(KeeperStorageLockWaitMicroseconds) \ + M(KeeperCommitWaitElapsedMicroseconds) \ + M(KeeperBatchMaxCount) \ + M(KeeperBatchMaxTotalSize) \ M(KeeperCommits) \ M(KeeperCommitsFailed) \ M(KeeperSnapshotCreations) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..925ac9a4efe 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -31,6 +31,13 @@ namespace CurrentMetrics extern const Metric KeeperOutstandingRequets; } +namespace ProfileEvents +{ + extern const Event KeeperCommitWaitElapsedMicroseconds; + extern const Event KeeperBatchMaxCount; + extern const Event KeeperBatchMaxTotalSize; +} + using namespace std::chrono_literals; namespace DB @@ -119,6 +126,7 @@ void KeeperDispatcher::requestThread() auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; + size_t max_batch_size = coordination_settings->max_requests_batch_size; /// The code below do a very simple thing: batch all write (quorum) requests into vector until /// previous write batch is not finished or max_batch size achieved. The main complexity goes from @@ -188,7 +196,6 @@ void KeeperDispatcher::requestThread() return false; }; - size_t max_batch_size = coordination_settings->max_requests_batch_size; while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; @@ -225,6 +232,12 @@ void KeeperDispatcher::requestThread() /// Process collected write requests batch if (!current_batch.empty()) { + if (current_batch.size() == max_batch_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1); + + if (current_batch_bytes_size == max_batch_bytes_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1); + LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size); auto result = server->putRequestBatch(current_batch); @@ -243,6 +256,8 @@ void KeeperDispatcher::requestThread() /// If we will execute read or reconfig next, we have to process result now if (execute_requests_after_write) { + Stopwatch watch; + SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds())); if (prev_result) result_buf = forceWaitAndProcessResult( prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..a12d8a50ac3 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,12 +1,14 @@ #include #include +#include +#include +#include #include #include -#include #include -#include #include #include +#include #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include namespace ProfileEvents @@ -31,6 +32,7 @@ namespace ProfileEvents extern const Event KeeperSnapshotApplysFailed; extern const Event KeeperReadSnapshot; extern const Event KeeperSaveSnapshot; + extern const Event KeeperStorageLockWaitMicroseconds; } namespace DB @@ -153,6 +155,14 @@ void assertDigest( } +std::unique_lock KeeperStateMachine::getStorageLock() const +{ + Stopwatch watch; + std::unique_lock lock(storage_and_responses_lock); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + return lock; +} + nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) { auto result = nuraft::buffer::alloc(sizeof(log_idx)); @@ -272,7 +282,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); if (storage->isFinalized()) return false; @@ -302,7 +312,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) { - std::lock_guard _(storage_and_responses_lock); + auto lock = getStorageLock(); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -391,7 +401,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [this](const KeeperStorage::ResponseForSession& response) + auto try_push = [&](const KeeperStorage::ResponseForSession& response) { if (!responses_queue.push(response)) { @@ -400,6 +410,17 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); } + + using namespace std::chrono; + uint64_t elapsed = request_for_session->time - duration_cast(system_clock::now().time_since_epoch()).count(); + if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request_for_session->request->toString(/*short_format=*/true)); + } }; try @@ -417,7 +438,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n response_for_session.session_id = -1; response_for_session.response = response; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -426,12 +447,13 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n else { if (op_num == Coordination::OpNum::Close) + { std::lock_guard lock(request_cache_mutex); parsed_request_cache.erase(request_for_session->session_id); } - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -482,7 +504,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -534,7 +556,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -561,7 +583,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -623,7 +645,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } { /// Destroy snapshot with lock - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -764,7 +786,7 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) @@ -774,97 +796,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi void KeeperStateMachine::shutdownStorage() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->finalize(); } std::vector KeeperStateMachine::getDeadSessions() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getDeadSessions(); } int64_t KeeperStateMachine::getNextZxid() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNextZXID(); } KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNodesDigest(false); } uint64_t KeeperStateMachine::getLastProcessedZxid() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getZXID(); } uint64_t KeeperStateMachine::getNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNodesCount(); } uint64_t KeeperStateMachine::getTotalWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getTotalWatchesCount(); } uint64_t KeeperStateMachine::getWatchedPathsCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getWatchedPathsCount(); } uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getSessionsWithWatchesCount(); } uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getTotalEphemeralNodesCount(); } uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getSessionWithEphemeralNodesCount(); } void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpWatches(buf); } void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpWatchesByPath(buf); } void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpSessionsAndEphemerals(buf); } uint64_t KeeperStateMachine::getApproximateDataSize() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getApproximateDataSize(); } uint64_t KeeperStateMachine::getKeyArenaSize() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getArenaDataSize(); } @@ -905,7 +927,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const void KeeperStateMachine::recalculateStorageStats() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index ee6109f0a17..5b166e11569 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -131,6 +131,8 @@ public: void reconfigure(const KeeperStorage::RequestForSession& request_for_session); private: + std::unique_lock getStorageLock() const; + CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -139,7 +141,7 @@ private: nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic - KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); + KeeperStoragePtr storage; /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; @@ -183,7 +185,6 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; KeeperStorage::ResponseForSession processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session) - TSA_REQUIRES(storage_and_responses_lock); + const KeeperStorage::RequestForSession& request_for_session); }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d6225baaf4c..1e53a664d1b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -40,6 +40,8 @@ namespace ProfileEvents extern const Event KeeperGetRequest; extern const Event KeeperListRequest; extern const Event KeeperExistsRequest; + extern const Event KeeperPreprocessElapsedMicroseconds; + extern const Event KeeperProcessElapsedMicroseconds; } namespace DB @@ -2309,6 +2311,20 @@ void KeeperStorage::preprocessRequest( std::optional digest, int64_t log_idx) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Preprocessing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, watch.elapsedMicroseconds()); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); @@ -2409,6 +2425,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( bool check_acl, bool is_local) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Processing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, watch.elapsedMicroseconds()); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 70858930115..5f2b14e17b0 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 4612e2e9fa8..47064b467e7 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -13,11 +13,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -30,6 +28,11 @@ #include #endif +namespace ProfileEvents +{ + extern const Event KeeperTotalElapsedMicroseconds; +} + namespace DB { @@ -411,12 +414,12 @@ void KeeperTCPHandler::runImpl() keeper_dispatcher->registerSession(session_id, response_callback); Stopwatch logging_stopwatch; + auto operation_max_ms = keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_connection_operation_threshold_ms; auto log_long_operation = [&](const String & operation) { - constexpr UInt64 operation_max_ms = 500; auto elapsed_ms = logging_stopwatch.elapsedMilliseconds(); if (operation_max_ms < elapsed_ms) - LOG_TEST(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); + LOG_INFO(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); logging_stopwatch.restart(); }; @@ -611,11 +614,13 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { - Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - conn_stats.updateLatency(elapsed); + Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); + ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); + Int64 elapsed_ms = elapsed / 1000; + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); - keeper_dispatcher->updateKeeperStatLatency(elapsed); + keeper_dispatcher->updateKeeperStatLatency(elapsed_ms); last_op.set(std::make_unique(LastOp{ .name = Coordination::toString(response->getOpNum()), From 4f2a0e8eeb0eccdb596a56167626d1561a0e4d70 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 1 Jul 2024 11:50:09 +0200 Subject: [PATCH 072/140] Fix possible issues with MySQL server protocol TLS connections The problem here is that retries (in SecureSocketImpl.cpp::mustRetry()) relies on non-zero socket timeout, but MySQL handler does not set timeouts for the socket (like other does), and this leads to a problem when OpenSSL returns SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE, the connection will be simply terminated. I've played with this patch, by hacking the openssl sources: diff --git a/crypto/bio/bss_sock.c b/crypto/bio/bss_sock.c index 82f7be85ae..a399291ff4 100644 --- a/crypto/bio/bss_sock.c +++ b/crypto/bio/bss_sock.c @@ -124,7 +125,18 @@ static int sock_read(BIO *b, char *out, int outl) ret = ktls_read_record(b->num, out, outl); else # endif - ret = readsocket(b->num, out, outl); + { + /* pthread_kill(pthread_self(), SIGUSR1); */ + static int i = 0; + if (!(++i % 2)) + { + fprintf(stderr, "sock_read: inject EAGAIN\n"); + ret = -1; + errno = EAGAIN; + } + else + ret = readsocket(b->num, out, outl); + } BIO_clear_retry_flags(b); if (ret <= 0) { if (BIO_sock_should_retry(ret)) And after this patch this succeed without errors: ch benchmark -c10 -q "SELECT * FROM mysql('127.0.0.1:9004', system, one, 'default', '', SETTINGS connection_pool_size=1, connect_timeout = 100, connection_wait_timeout = 100)" Note, that this also fixes the timeouts for plain (non-TLS) connections Signed-off-by: Azat Khuzhin --- src/Server/MySQLHandler.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 9471509ad4b..c0f015bfcd5 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -204,6 +204,10 @@ void MySQLHandler::run() session->setClientConnectionId(connection_id); + const Settings & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + socket().setSendTimeout(settings.send_timeout); + in = std::make_shared(socket(), read_event); out = std::make_shared(socket(), write_event); packet_endpoint = std::make_shared(*in, *out, sequence_id); @@ -451,6 +455,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) // Settings replacements if (!should_replace) + { for (auto const & [mysql_setting, clickhouse_setting] : settings_replacements) { const auto replacement_query_opt = setSettingReplacementQuery(query, mysql_setting, clickhouse_setting); @@ -461,6 +466,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) break; } } + } auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); @@ -470,6 +476,10 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) settings.prefer_column_name_to_alias = true; query_context->setSettings(settings); + /// Update timeouts + socket().setReceiveTimeout(settings.receive_timeout); + socket().setSendTimeout(settings.send_timeout); + CurrentThread::QueryScope query_scope{query_context}; std::atomic affected_rows {0}; @@ -643,7 +653,11 @@ void MySQLHandlerSSL::finishHandshakeSSL( client_capabilities = ssl_request.capability_flags; max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; secure_connection = true; + ss = std::make_shared(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext())); + ss->setReceiveTimeout(socket().getReceiveTimeout()); + ss->setSendTimeout(socket().getSendTimeout()); + in = std::make_shared(*ss); out = std::make_shared(*ss); sequence_id = 2; From 7361407ea50e6cc22ced76527634b3404a99642c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 15:25:00 +0200 Subject: [PATCH 073/140] Fix test --- src/Coordination/CoordinationSettings.cpp | 17 +++++++++++++++++ .../test_keeper_four_word_command/test.py | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 05f691ca76b..d72d39fd7e1 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText("async_replication=", buf); write_bool(coordination_settings->async_replication); + + writeText("latest_logs_cache_size_threshold=", buf); + write_int(coordination_settings->latest_logs_cache_size_threshold); + writeText("commit_logs_cache_size_threshold=", buf); + write_int(coordination_settings->commit_logs_cache_size_threshold); + + writeText("disk_move_retries_wait_ms=", buf); + write_int(coordination_settings->disk_move_retries_wait_ms); + writeText("disk_move_retries_during_init=", buf); + write_int(coordination_settings->disk_move_retries_during_init); + + writeText("log_slow_total_threshold_ms=", buf); + write_int(coordination_settings->log_slow_total_threshold_ms); + writeText("log_slow_cpu_threshold_ms=", buf); + write_int(coordination_settings->log_slow_cpu_threshold_ms); + writeText("log_slow_connection_operation_threshold_ms=", buf); + write_int(coordination_settings->log_slow_connection_operation_threshold_ms); } KeeperConfigurationAndSettingsPtr diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 44b2b50673a..a3a059c1dcb 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -293,6 +293,16 @@ def test_cmd_conf(started_cluster): assert result["configuration_change_tries_count"] == "20" assert result["async_replication"] == "true" + + assert result["latest_logs_cache_size_threshold"] == "1073741824" + assert result["commit_logs_cache_size_threshold"] == "524288000" + + assert result["disk_move_retries_wait_ms"] == "1000" + assert result["disk_move_retries_during_init"] == "100" + + assert result["log_slow_total_threshold_ms"] == "5000" + assert result["log_slow_cpu_threshold_ms"] == "5000" + assert result["log_slow_connection_operation_threshold_ms"] == "1000" finally: close_keeper_socket(client) From 5e7a8245c467dbc40344bda67760ad1d33535994 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 15:40:49 +0200 Subject: [PATCH 074/140] Fix memory leak --- .../ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 12 +++++++----- .../ObjectStorage/DataLakes/DeltaLakeMetadata.h | 3 --- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 3b6cbca5d46..bc64ef15cf1 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -51,8 +51,10 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeltaLakeMetadata::Impl +struct DeltaLakeMetadataImpl { + using ConfigurationPtr = DeltaLakeMetadata::ConfigurationPtr; + ObjectStoragePtr object_storage; ConfigurationPtr configuration; ContextPtr context; @@ -61,7 +63,7 @@ struct DeltaLakeMetadata::Impl * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ - Impl(ObjectStoragePtr object_storage_, + DeltaLakeMetadataImpl(ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) : object_storage(object_storage_) @@ -586,14 +588,14 @@ DeltaLakeMetadata::DeltaLakeMetadata( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) - : impl(std::make_unique(object_storage_, configuration_, context_)) { - auto result = impl->processMetadataFiles(); + auto impl = DeltaLakeMetadataImpl(object_storage_, configuration_, context_); + auto result = impl.processMetadataFiles(); data_files = result.data_files; schema = result.schema; partition_columns = result.partition_columns; - LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + LOG_TRACE(impl.log, "Found {} data files, {} partition files, schema: {}", data_files.size(), partition_columns.size(), schema.toString()); } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index 926bd1b451d..a479a3dd293 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -45,10 +45,7 @@ public: } private: - struct Impl; - const std::shared_ptr impl; mutable Strings data_files; - NamesAndTypesList schema; std::unordered_map column_name_to_physical_name; DataLakePartitionColumns partition_columns; From 00b030be3ef9ace2d5dc7d5403e2cfbecb5d96eb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 13:41:03 +0000 Subject: [PATCH 075/140] Better --- tests/queries/0_stateless/01825_type_json_from_map.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_type_json_from_map.sql b/tests/queries/0_stateless/01825_type_json_from_map.sql index 7cad50b363b..44c0417a38e 100644 --- a/tests/queries/0_stateless/01825_type_json_from_map.sql +++ b/tests/queries/0_stateless/01825_type_json_from_map.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-random-merge-tree-settings +-- Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings -- For example, it is 4 times slower with --merge_max_block_size=5967 --index_granularity=55 --min_bytes_for_wide_part=847510133 DROP TABLE IF EXISTS t_json; From 5541461b991c8fe17414c7d663c7aa7ff28136ae Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 16:04:21 +0200 Subject: [PATCH 076/140] Fix time of writing to _log table --- .../ObjectStorageQueue/ObjectStorageQueueSource.cpp | 13 ++++++------- .../ObjectStorageQueue/ObjectStorageQueueSource.h | 7 ++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 2effbe7e7c2..955e49bc2bf 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -469,8 +469,6 @@ Chunk ObjectStorageQueueSource::generateImpl() LOG_ERROR(log, "Failed to set file {} as failed: {}", object_info->relative_path, getCurrentExceptionMessage(true)); } - - appendLogElement(reader.getObjectInfo()->getPath(), *file_status, processed_rows_from_file, false); } LOG_TEST(log, "Query is cancelled"); @@ -502,8 +500,6 @@ Chunk ObjectStorageQueueSource::generateImpl() object_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(path, *file_status, processed_rows_from_file, false); - /// Leave the file half processed. Table is being dropped, so we do not care. break; } @@ -548,7 +544,6 @@ Chunk ObjectStorageQueueSource::generateImpl() failed_during_read_files.push_back(file_metadata); file_status->onFailed(getCurrentExceptionMessage(true)); - appendLogElement(path, *file_status, processed_rows_from_file, false); if (processed_rows_from_file == 0) { @@ -567,8 +562,6 @@ Chunk ObjectStorageQueueSource::generateImpl() throw; } - appendLogElement(path, *file_status, processed_rows_from_file, true); - file_status->setProcessingEndTime(); file_status.reset(); @@ -663,10 +656,14 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio applyActionAfterProcessing(file_metadata->getPath()); } else + { file_metadata->setFailed( exception_message, /* reduce_retry_count */false, /* overwrite_status */true); + + } + appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */success); } for (const auto & file_metadata : failed_during_read_files) @@ -677,6 +674,8 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio file_metadata->getFileStatus()->getException(), /* reduce_retry_count */true, /* overwrite_status */false); + + appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */false); } } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index 50428ed5f4b..ccd87e8a269 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -160,7 +160,12 @@ private: Chunk generateImpl(); void applyActionAfterProcessing(const String & path); - void appendLogElement(const std::string & filename, ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void appendLogElement( + const std::string & filename, + ObjectStorageQueueMetadata::FileStatus & file_status_, + size_t processed_rows, + bool processed); + void lazyInitialize(size_t processor); }; From f596f0f66aa571afe3d762d938a812f52a8a9766 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 30 Jun 2024 23:59:08 +0200 Subject: [PATCH 077/140] add restriction for storage join --- src/Storages/StorageJoin.cpp | 5 ++- ...join_strictness_type_restriction.reference | 0 ...orage_join_strictness_type_restriction.sql | 42 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index d12e5b1a20b..eb58b9ec3f8 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -395,11 +395,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..1c52f79db11 --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } From cc37cbdd176867ab444f22d58a2feb5297ef952c Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Jul 2024 17:03:27 +0200 Subject: [PATCH 078/140] refine tests --- .../03197_storage_join_strictness_type_restriction.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql index 1c52f79db11..5aa3e4c2e0c 100644 --- a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -15,7 +15,7 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( @@ -33,10 +33,10 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } From e439921d373d0f73067600340b53c722fd4fcef8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 1 Jul 2024 15:31:32 +0000 Subject: [PATCH 079/140] slightly better --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 4 +++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e92a608eed0..bdea46a8210 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -375,6 +375,12 @@ void IMergeTreeDataPart::unloadIndex() index_loaded = false; } +bool IMergeTreeDataPart::isIndexLoaded() const +{ + std::scoped_lock lock(index_mutex); + return index_loaded; +} + void IMergeTreeDataPart::setName(const String & new_name) { mutable_name = new_name; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c9b3ec26df0..571f1389e10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -369,6 +369,7 @@ public: void setIndex(const Columns & cols_); void setIndex(Columns && cols_); void unloadIndex(); + bool isIndexLoaded() const; /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4add66e1ecd..9d98fbeda20 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8622,7 +8622,9 @@ size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() for (const auto & part : parts_range) { /// Outdated part may be hold by SELECT query and still needs the index. - if (part.unique()) + /// This check requires lock of index_mutex but if outdated part is unique then there is no + /// contention on it, so it's relatively cheap and it's ok to check under a global parts lock. + if (part.unique() && part->isIndexLoaded()) parts_to_unload_index.push_back(part); } } From adcaf117a1b2987aa47c08d06e5db0c177a191b8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 1 Jul 2024 16:38:39 +0100 Subject: [PATCH 080/140] impl --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeSource.cpp | 9 ++++----- src/Storages/MergeTree/MergeTreeSource.h | 3 ++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0dacdc0b958..b35a2e6f220 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -382,7 +382,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -481,7 +481,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -593,7 +593,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (set_rows_approx) source->addTotalRowsApprox(total_rows); diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; From ef24f517892891b3b298dc33cbb27493fa4ba668 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 1 Jul 2024 19:17:06 +0200 Subject: [PATCH 081/140] Fix handling of SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE with zero timeout Previously if you were using socket without timeout it wasn't able to handle SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE, and even though sockets without timeouts is an odd thing (but it is possible - [1]), it still may be possible somewhere. [1]: https://github.com/ClickHouse/ClickHouse/pull/65917 Signed-off-by: Azat Khuzhin --- .../include/Poco/Net/SecureSocketImpl.h | 5 +-- .../NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 31 +++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h index 49c12b6b45f..890752c52da 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h @@ -235,8 +235,6 @@ namespace Net /// Note that simply closing a socket is not sufficient /// to be able to re-use it again. - Poco::Timespan getMaxTimeout(); - private: SecureSocketImpl(const SecureSocketImpl &); SecureSocketImpl & operator=(const SecureSocketImpl &); @@ -250,6 +248,9 @@ namespace Net Session::Ptr _pSession; friend class SecureStreamSocketImpl; + + Poco::Timespan getMaxTimeoutOrLimit(); + //// Return max(send, receive) if non zero, otherwise maximum timeout }; diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index efe25f65909..4873d259ae5 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake) if (performHandshake && _pSocket->getBlocking()) { int ret; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { /// SSL record may consist of several TCP packets, @@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake() poco_check_ptr (_pSSL); int rc; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const return 0; } -Poco::Timespan SecureSocketImpl::getMaxTimeout() +Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit() { std::lock_guard lock(_mutex); Poco::Timespan remaining_time = _pSocket->getReceiveTimeout(); Poco::Timespan send_timeout = _pSocket->getSendTimeout(); if (remaining_time < send_timeout) remaining_time = send_timeout; + /// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this + /// + /// NOTE: we cannot use INT64_MAX (std::numeric_limits::max()), + /// since it will be later passed to poll() which accept int timeout, and + /// even though poll() accepts milliseconds and Timespan() accepts + /// microseconds, let's use smaller maximum value just to avoid some possible + /// issues, this should be enough anyway (it is ~24 days). + if (remaining_time == 0) + remaining_time = Poco::Timespan(std::numeric_limits::max()); return remaining_time; } bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) { + if (remaining_time == 0) + return false; std::lock_guard lock(_mutex); if (rc <= 0) { @@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_READ: if (_pSocket->getBlocking()) { - /// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket, - /// epoll_wait returns true without waiting for new data even if remaining_time == 0 - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ)) return true; else throw Poco::TimeoutException(); @@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_WRITE: if (_pSocket->getBlocking()) { - /// The same as for SSL_ERROR_WANT_READ - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE)) return true; else throw Poco::TimeoutException(); } break; + /// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in + /// OpenSSL), so this likely dead code, but let's leave it for + /// compatibility with other implementations case SSL_ERROR_SYSCALL: return socketError == POCO_EAGAIN || socketError == POCO_EINTR; default: From ac648a1fe35146e02d571ce2783a1e1ce9010a21 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 1 Jul 2024 19:41:45 +0200 Subject: [PATCH 082/140] Fix build --- src/Storages/ObjectStorage/Azure/Configuration.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index f9a2a59e4a0..f763a997bfb 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -180,7 +180,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a } std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); + size_t pos = connection_url.find('?'); std::shared_ptr managed_identity_credential; if (storage_shared_key_credential) { @@ -192,12 +192,12 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a if (pos == std::string::npos) { auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); + blob_service_client = std::make_unique(connection_url, workload_identity_credential); } else { managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); + blob_service_client = std::make_unique(connection_url, managed_identity_credential); } } From 447c0db2bc1db2706598a5fe0eb81332b3e4f0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 1 Jul 2024 20:11:19 +0200 Subject: [PATCH 083/140] Fix SettingsChangesHistory 24.7 --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 194a0024f2b..65efc157741 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,6 +58,7 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -88,7 +89,6 @@ static std::initializer_list Date: Mon, 1 Jul 2024 21:55:45 +0200 Subject: [PATCH 084/140] Fix --- src/Interpreters/Cache/WriteBufferToFileSegment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e654d091561..dd038948adf 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -134,7 +134,7 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() if (file_segment->getDownloadedSize() > 0) return std::make_unique(file_segment->getPath()); else - return std::make_unique(); + return std::make_unique(); } } From 27e0e57054010446a530efc3eb02e85d09f7e9e2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 22:47:36 +0200 Subject: [PATCH 085/140] Use ReadBufferFromFileBase instead of ReadBufferFromFile for reread_buffer_from_file --- src/Storages/MergeTree/MergeTask.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..c8f1a08128b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -555,18 +555,18 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); From a5ada16719b9e74a3ac840bff360ddf830005d63 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 2 Jul 2024 01:11:15 +0000 Subject: [PATCH 086/140] Use -Og instead of -O0 for debug builds --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c4f093b1c99..f796e6c4616 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -319,7 +319,6 @@ endif() # Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") -# Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") # Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer @@ -333,15 +332,15 @@ endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (OS_DARWIN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") From 13c3bab6240aea20ea1da1758806cb898a08a3b6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 2 Jul 2024 09:54:37 +0800 Subject: [PATCH 087/140] update doc --- docs/en/sql-reference/functions/string-functions.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 6b95796db03..bd24d2307dc 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -571,6 +571,7 @@ SELECT upper('clickhouse'); Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. @@ -734,13 +735,13 @@ concat(s1, s2, ...) **Arguments** -Values of arbitrary type. Empty argument is also allowed. +Values of arbitrary type. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** -The String created by concatenating the arguments. If there are no arguments, the function returns empty string. +The String created by concatenating the arguments. If any of arguments is `NULL`, the function returns `NULL`. From 0bb374b5978b1769ee2d321ddf5f93289ae7ddb5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jul 2024 06:19:21 +0200 Subject: [PATCH 088/140] Fix error (tnx, @jkartseva) --- .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 6c0fc403f04..0ebe885a3e7 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -149,7 +149,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr auto settings_ptr = settings.get(); auto client_ptr = client.get(); - return std::make_shared(path_prefix, client_ptr, max_keys); + return std::make_shared(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size); } void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const From 3b4d5b00de37bcfce48386e2d04f592553d04dd9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 20:49:39 +0200 Subject: [PATCH 089/140] Re-configure yamllint to allow document-start CI reported [1]: 1:1 warning found forbidden document start "---" (document-start) [1]: https://s3.amazonaws.com/clickhouse-test-reports/65563/27ef3066f685390227c84f31c7c95e1670f9960b/style_check.html It was an error to forbid it, and it made only by mistake to ignore the errors. Signed-off-by: Azat Khuzhin --- .yamllint | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.yamllint b/.yamllint index 9d6550ac960..f144e2d47b1 100644 --- a/.yamllint +++ b/.yamllint @@ -13,5 +13,4 @@ rules: level: warning comments: min-spaces-from-content: 1 - document-start: - present: false + document-start: disable From 2991e27183a7dfe0e60d944155759f15123d96a3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 15:26:46 +0200 Subject: [PATCH 090/140] Parse user from URL for dashboard.html (useful for sharing) Signed-off-by: Azat Khuzhin --- programs/server/dashboard.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index b21d4b86314..45f988f7b1e 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -506,6 +506,14 @@ let user = 'default'; let password = ''; let add_http_cors_header = (location.protocol != 'file:'); +const current_url = new URL(window.location); +/// Substitute user name if it's specified in the query string +const user_from_url = current_url.searchParams.get('user'); +if (user_from_url) { + user = user_from_url; +} + + const errorCodeMessageMap = { 516: 'Error authenticating with database. Please check your connection params and try again.' } From 93c1b5d8a72559517bafd45cf94e2e531e39a404 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 08:21:51 +0000 Subject: [PATCH 091/140] Address issues pointed out in the PR --- docs/en/sql-reference/functions/bit-functions.md | 6 +++--- src/Functions/FunctionBitTestMany.h | 2 +- src/Functions/bitTest.cpp | 8 ++++---- .../0_stateless/01082_bit_test_out_of_bound.reference | 3 +++ tests/queries/0_stateless/01082_bit_test_out_of_bound.sql | 4 ++++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index a48893b93bf..5ab7e07fcad 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring). ## bitTest -Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. +Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0. **Syntax** @@ -226,7 +226,7 @@ Result: ## bitTestAll -Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0. The conjuction for bit-wise operations: @@ -289,7 +289,7 @@ Result: ## bitTestAny -Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0. The disjunction for bit-wise operations: diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 19ece2ae9e5..950e4ab4ea8 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -16,8 +16,8 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int PARAMETER_OUT_OF_BOUND; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 1223ef7cbbb..cb6b83c1cf1 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -28,10 +28,10 @@ struct BitTestImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else { - typename NumberTraits::ToInteger::Type a_int(a); - typename NumberTraits::ToInteger::Type b_int(b); - const auto max_position = decltype(b)((8 * sizeof(a)) - 1); - if (b > max_position || b < 0) + typename NumberTraits::ToInteger::Type a_int = a; + typename NumberTraits::ToInteger::Type b_int = b; + const auto max_position = static_cast((8 * sizeof(a)) - 1); + if (b_int > max_position || b_int < 0) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "The bit position argument needs to a positive value and less or equal to {} for integer {}", std::to_string(max_position), std::to_string(a_int)); diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index cf12c6b0b1c..26085389381 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -1,3 +1,4 @@ +-- bitTestAny 0 1 1 0 2 1 @@ -6,6 +7,7 @@ 5 0 6 1 7 0 +-- bitTestAll 0 1 1 0 2 1 @@ -14,6 +16,7 @@ 5 0 6 1 7 0 +-- bitTest 0 1 1 0 2 1 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 92ece2a4aa4..e741cb249d0 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,8 +1,12 @@ +SELECT '-- bitTestAny'; SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTestAll'; SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT '-- bitTest'; SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); From aaffa64cdd34da31009c206a30307e7b5db91155 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 10:30:45 +0200 Subject: [PATCH 092/140] Fix data race for Keeper snapshot queue --- src/Common/ConcurrentBoundedQueue.h | 8 +------- src/Coordination/KeeperDispatcher.cpp | 10 ++-------- src/Coordination/KeeperStateMachine.cpp | 8 +++++--- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..38893242a2b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -319,19 +319,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..df152bbe0af 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -569,7 +569,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; if (!execute_only_cleanup) { try @@ -599,7 +599,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -612,13 +613,14 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { From 9e586f0871f67d612cb8c10ec631c4d3ca810237 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 10:50:21 +0200 Subject: [PATCH 093/140] Fixes --- src/Coordination/CoordinationSettings.h | 2 +- src/Coordination/KeeperStateMachine.cpp | 74 +++++++++++++------------ src/Coordination/KeeperStateMachine.h | 8 +-- src/Coordination/KeeperStorage.cpp | 4 +- 4 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index e7ae1f86d2e..6e23a56ef97 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -60,7 +60,7 @@ struct Settings; M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ - M(UInt64, log_slow_cpu_threshold_ms, 5000, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a12d8a50ac3..88f708ab4ae 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -153,14 +153,20 @@ void assertDigest( } } -} - -std::unique_lock KeeperStateMachine::getStorageLock() const +struct TSA_SCOPED_LOCKABLE LockGuardWithStats final { - Stopwatch watch; - std::unique_lock lock(storage_and_responses_lock); - ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; + std::unique_lock lock; + explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex) + { + Stopwatch watch; + std::unique_lock l(mutex); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + lock = std::move(l); + } + + ~LockGuardWithStats() TSA_RELEASE() = default; +}; + } nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) @@ -282,7 +288,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); if (storage->isFinalized()) return false; @@ -312,7 +318,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -412,7 +418,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } using namespace std::chrono; - uint64_t elapsed = request_for_session->time - duration_cast(system_clock::now().time_since_epoch()).count(); + uint64_t elapsed = duration_cast(system_clock::now().time_since_epoch()).count() - request_for_session->time; if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) { LOG_INFO( @@ -438,7 +444,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n response_for_session.session_id = -1; response_for_session.response = response; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -453,7 +459,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n parsed_request_cache.erase(request_for_session->session_id); } - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -504,7 +510,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -556,7 +562,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -583,7 +589,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -645,7 +651,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } { /// Destroy snapshot with lock - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -786,7 +792,7 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) @@ -796,97 +802,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi void KeeperStateMachine::shutdownStorage() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } std::vector KeeperStateMachine::getDeadSessions() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } int64_t KeeperStateMachine::getNextZxid() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } uint64_t KeeperStateMachine::getLastProcessedZxid() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } uint64_t KeeperStateMachine::getNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } uint64_t KeeperStateMachine::getTotalWatchesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } uint64_t KeeperStateMachine::getWatchedPathsCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } uint64_t KeeperStateMachine::getApproximateDataSize() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } uint64_t KeeperStateMachine::getKeyArenaSize() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } @@ -927,7 +933,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const void KeeperStateMachine::recalculateStorageStats() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 5b166e11569..7ea14aa2d30 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -131,8 +131,6 @@ public: void reconfigure(const KeeperStorage::RequestForSession& request_for_session); private: - std::unique_lock getStorageLock() const; - CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -141,7 +139,7 @@ private: nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic - KeeperStoragePtr storage; + KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; @@ -184,7 +182,7 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session); + KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session) + TSA_REQUIRES(storage_and_responses_lock); }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1e53a664d1b..1542eb0d71a 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -2322,7 +2322,7 @@ void KeeperStorage::preprocessRequest( elapsed_ms, zk_request->toString(/*short_format=*/true)); } - ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed); }); if (!initialized) @@ -2436,7 +2436,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( elapsed_ms, zk_request->toString(/*short_format=*/true)); } - ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed); }); if (!initialized) From ef6d579e7f2f74e5dd53054abae099d042113eb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jul 2024 10:56:03 +0200 Subject: [PATCH 094/140] Minor changes in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19557652aee..e2eb65e2967 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ * Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). * Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). * Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). -* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Add `http_response_headers` configuration to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). * Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing. * Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). * Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). From dc8a8d9719dada68e3f582d7479d3055238adeae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jul 2024 10:56:44 +0200 Subject: [PATCH 095/140] Remove unnatural punctuation from Parquet --- src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 9a15789f267..18a0db7484e 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -113,7 +113,7 @@ private: { throw Exception( ErrorCodes::PARQUET_EXCEPTION, - "Unsupported logical type: {} and physical type: {} for field =={}=={}", + "Unsupported logical type: {} and physical type: {} for field `{}`{}", col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); } }; From 2eb750f8ebcd74ebe2566e9138496a05f75f8b9c Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 2 Jul 2024 11:35:54 +0200 Subject: [PATCH 096/140] add missing transactionXYZ functions to docs --- .../functions/other-functions.md | 103 +++++++++++++++++- 1 file changed, 98 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 738a12143a3..4e252785715 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3865,7 +3865,7 @@ Result: Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). :::note -This function is part of an experimental feature set. Enable experimental transaction support by adding this setting in `config.d/transactions.xml`: +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: ``` @@ -3884,21 +3884,114 @@ transactionID() **Returned value** -- TransactionID. [String](../data-types/string.md). [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md), [UUID](../data-types/uuid.md)). +- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md). + +- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md). +- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md). +- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md). **Example** Query: ```sql +BEGIN TRANSACTION; SELECT transactionID(); +ROLLBACK; ``` Result: ```response -┌─transactionID()──────────────────────────────┐ -│ (0,0,'00000000-0000-0000-0000-000000000000') │ -└──────────────────────────────────────────────┘ +┌─transactionID()────────────────────────────────┐ +│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │ +└────────────────────────────────────────────────┘ ``` +## transactionLatestSnapshot + +Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading. + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionLatestSnapshot() +``` + +**Returned value** + +- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionLatestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` + +## transactionOldestSnapshot + +Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionOldestSnapshot() +``` + +**Returned value** + +- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionOldestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` From a905b24f7585191f2271f77cc4817e11f5b758f0 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:54:56 +0200 Subject: [PATCH 097/140] Fix clang-tidy --- src/Processors/Chunk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 8e3ca0b03b3..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() ? columns.size() - 1 : 0); + position, !columns.empty() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() ? columns.size() - 1 : 0)); + toString(position), toString(!columns.empty() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } From ee0c4093d461233cfb920a4a224292ea9529393b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:03:27 +0200 Subject: [PATCH 098/140] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..7d6499cef5e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -253,7 +253,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From 422b8dea317168ddc4aa4c0e14e0d0350ce0be81 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 2 Jul 2024 12:30:22 +0200 Subject: [PATCH 099/140] Add database_replicated_allow_heavy_create to settings changes --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..70f94fe2ab0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list Date: Tue, 2 Jul 2024 10:51:58 +0000 Subject: [PATCH 100/140] Fix support of non-const scale arguments in power function --- src/Functions/FunctionsRound.h | 171 ++++++++++++++---- .../03165_round_scale_as_column.reference | 13 ++ .../03165_round_scale_as_column.sql | 3 +- 3 files changed, 152 insertions(+), 35 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..d43f7f264b4 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -518,39 +518,105 @@ struct Dispatcher template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(value_col_typed.getData().size()); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + auto col_res = ColumnVector::create(); + + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(value_col_typed->getData().size()); + + if (!vec_res.empty()) + { + // Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) + { + size_t scale = 1; + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else if (scale_arg > 0) + { + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else + { + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & value_data = value_col_typed->getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + } + } + } + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); if (scale_col == nullptr || isColumnConst(*scale_col)) { + vec_res.resize(1); auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); if (scale_arg == 0) { size_t scale = 1; - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else if (scale_arg > 0) { size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else { size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } } /// Non-const scale argument: else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { - const auto & value_data = value_col_typed.getData(); const auto & scale_data = scale_col_typed->getData(); - const size_t rows = value_data.size(); + const size_t rows = scale_data.size(); + + vec_res.resize(rows); for (size_t i = 0; i < rows; ++i) { @@ -561,23 +627,23 @@ struct Dispatcher if (raw_scale == 0) { size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else if (raw_scale > 0) { size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else { size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } } } + return col_res; } - - return col_res; + return nullptr; } }; @@ -589,24 +655,64 @@ public: template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - - auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); - auto & vec_res = col_res->getData(); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + const typename ColumnDecimal::Container & vec_src = value_col_typed->getData(); + + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed->getScale()); + auto & vec_res = col_res->getData(); + vec_res.resize(vec_src.size()); + + if (!vec_res.empty()) + { + /// Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg); + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } + } + } + + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: if (scale_col == nullptr || isColumnConst(*scale_col)) { + auto col_res = ColumnDecimal::create(1, col->getScale()); auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); + return col_res; } - /// Non-const scale argument - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + /// Non-const scale argument: + if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { const auto & scale = scale_col_typed->getData(); - const size_t rows = vec_src.size(); + const size_t rows = scale.size(); + auto col_res = ColumnDecimal::create(rows, col->getScale()); for (size_t i = 0; i < rows; ++i) { @@ -614,13 +720,13 @@ public: validateScale(scale64); Scale raw_scale = scale64; - DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } + return col_res; } } - - return col_res; + return nullptr; } }; @@ -671,9 +777,6 @@ public: using ScaleTypes = std::decay_t; using ScaleType = typename ScaleTypes::RightType; - if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); - res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); return true; }; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference index 9ad25ed466a..e0c9b6959ee 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.reference +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -2162,4 +2162,17 @@ CHECKPOINT2 10 1.6275 1.6275 1.6275 1.6275 1 1 +3 +3.1 +3.14 +3.142 +3.1416 +3.14159 +3.141593 +3.1415927 +3.14159265 +3.141592654 +42 +42.4 +42.42 1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql index 229f705808d..adae36564b8 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.sql +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -118,6 +118,7 @@ DROP TABLE tab; SELECT round(1, 1); SELECT round(materialize(1), materialize(1)); -SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(pi(), number) FROM numbers(10); +SELECT round(toDecimal32(42.42, 2), number) from numbers(3); SELECT round(materialize(1), 1); SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} From 2ce564daa0a7eb98b46dbed50d2eddfff8a731c2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 13:05:17 +0200 Subject: [PATCH 101/140] Make 01006_simpod_empty_part_single_column_write.sh always use vertical merge --- .../0_stateless/01006_simpod_empty_part_single_column_write.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " From f2c06becd5fb64d1075a7a327d990f44eeb12b2d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 13:17:28 +0200 Subject: [PATCH 102/140] Fix race in s3queue --- src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 955e49bc2bf..1939ea0a66f 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -111,10 +111,12 @@ void ObjectStorageQueueSource::FileIterator::returnForRetry(Source::ObjectInfoPt if (metadata->useBucketsForProcessing()) { const auto bucket = metadata->getBucketForPath(object_info->relative_path); + std::lock_guard lock(mutex); listed_keys_cache[bucket].keys.emplace_front(object_info); } else { + std::lock_guard lock(mutex); objects_to_retry.push_back(object_info); } } From 94dba17ad92d6982e5e681113e97eb39099ff696 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 13:26:52 +0200 Subject: [PATCH 103/140] Fix test --- tests/integration/test_keeper_four_word_command/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index a3a059c1dcb..83503122729 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -301,7 +301,7 @@ def test_cmd_conf(started_cluster): assert result["disk_move_retries_during_init"] == "100" assert result["log_slow_total_threshold_ms"] == "5000" - assert result["log_slow_cpu_threshold_ms"] == "5000" + assert result["log_slow_cpu_threshold_ms"] == "100" assert result["log_slow_connection_operation_threshold_ms"] == "1000" finally: close_keeper_socket(client) From 70a2061c9bccd85f6c939529609051c06042c563 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 12:27:01 +0000 Subject: [PATCH 104/140] Fixed bug and added test --- src/Disks/ObjectStorages/IObjectStorage.h | 1 + .../StorageObjectStorageSource.cpp | 41 ++++--- .../StorageObjectStorageSource.h | 6 +- .../03036_reading_s3_archives.reference | 104 +++++++++--------- .../0_stateless/03036_reading_s3_archives.sql | 30 ++--- 5 files changed, 96 insertions(+), 86 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 9f5c14fdb7c..6410a9a7a73 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..9436e729683 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -196,13 +196,12 @@ Chunk StorageObjectStorageSource::generate() const auto & filename = object_info->getFileName(); chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, read_from_format_info.requested_virtual_columns, - { - .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified - }); + chunk, + read_from_format_info.requested_virtual_columns, + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); return chunk; } @@ -690,10 +689,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -732,6 +730,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -756,6 +755,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -769,15 +770,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..2cbe8a9776c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -259,7 +259,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -278,9 +279,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) From f2244853164906cbf6faf186b7f3782ccc504d5a Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 13:01:33 +0000 Subject: [PATCH 105/140] Add reference to documentation --- docs/en/sql-reference/table-functions/s3.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1a7e2b8d66a..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,9 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} From bd47b8f3a11ffe582af6b678ab6330dfdbbb00ba Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:15:32 +0200 Subject: [PATCH 106/140] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 5c3991ed293..a6d9988f81f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -977,6 +977,9 @@ TotalRowsOfMergeTreeTables TotalTemporaryFiles Tradeoff Transactional +transactionID +transactionLatestSnapshot +transactionOldestSnapshot Tsai Tukey TwoColumnList From f8c4d77aae321207ce2dc5920fd1e95b7985bdee Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 14:17:19 +0000 Subject: [PATCH 107/140] Update aspell-dict --- .../aspell-ignore/en/aspell-dict.txt | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a6d9988f81f..239ade4d503 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -48,7 +48,6 @@ AutoML Autocompletion AvroConfluent BIGINT -bigrams BIGSERIAL BORO BSON @@ -223,7 +222,6 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL -deallocated Decrypted Deduplicate Deduplication @@ -295,7 +293,6 @@ FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes FixedString FlameGraph -flameGraph Flink ForEach FreeBSD @@ -977,9 +974,6 @@ TotalRowsOfMergeTreeTables TotalTemporaryFiles Tradeoff Transactional -transactionID -transactionLatestSnapshot -transactionOldestSnapshot Tsai Tukey TwoColumnList @@ -995,6 +989,8 @@ UPDATEs URIs URL URL's +URLDecode +URLEncode URLHash URLHierarchy URLPathHierarchy @@ -1012,13 +1008,10 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unigrams Updatable Uppercased Uptime Uptrace -URLDecode -URLEncode UserID Util VARCHAR @@ -1224,6 +1217,7 @@ basename bcrypt benchmarking bfloat +bigrams binlog bitAnd bitCount @@ -1473,6 +1467,7 @@ dbeaver dbgen dbms ddl +deallocated deallocation deallocations debian @@ -1512,11 +1507,11 @@ deserializing destructor destructors detectCharset -detectTonality detectLanguage detectLanguageMixed detectLanguageUnknown detectProgrammingLanguage +detectTonality determinator deterministically dictGet @@ -1532,8 +1527,8 @@ dictIsIn disableProtocols disjunction disjunctions -displaySecretsInShowAndSelect displayName +displaySecretsInShowAndSelect distro divideDecimal dmesg @@ -1583,11 +1578,11 @@ evalMLMethod exFAT expiryMsec exponentialMovingAverage -exponentialmovingaverage exponentialTimeDecayedAvg exponentialTimeDecayedCount exponentialTimeDecayedMax exponentialTimeDecayedSum +exponentialmovingaverage expr exprN extendedVerification @@ -1624,6 +1619,7 @@ firstSignificantSubdomainCustom firstSignificantSubdomainCustomRFC firstSignificantSubdomainRFC fixedstring +flameGraph flamegraph flatbuffers flattenTuple @@ -1806,8 +1802,8 @@ incrementing indexHint indexOf infi -infty inflight +infty initcap initcapUTF initialQueryID @@ -1955,9 +1951,9 @@ loghouse london lookups loongarch -lowcardinality lowCardinalityIndices lowCardinalityKeys +lowcardinality lowerUTF lowercased lttb @@ -2265,9 +2261,9 @@ proleptic prometheus proportionsZTest proto -protocol protobuf protobufsingle +protocol proxied pseudorandom pseudorandomize @@ -2758,6 +2754,12 @@ topLevelDomain topLevelDomainRFC topk topkweighted +transactionID +transactionID +transactionLatestSnapshot +transactionLatestSnapshot +transactionOldestSnapshot +transactionOldestSnapshot transactional transactionally translateUTF @@ -2811,6 +2813,7 @@ unescaping unhex unicode unidimensional +unigrams unintuitive uniq uniqCombined From 65eb46927839a42c32bf5db4054095fe7dae99a7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 14:18:33 +0000 Subject: [PATCH 108/140] Remove duplicates from aspell-dict --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 239ade4d503..ea3dd642ee0 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2755,10 +2755,7 @@ topLevelDomainRFC topk topkweighted transactionID -transactionID transactionLatestSnapshot -transactionLatestSnapshot -transactionOldestSnapshot transactionOldestSnapshot transactional transactionally From ad6ddf634db8318f48b8f5e95d17473a1e5ae3e3 Mon Sep 17 00:00:00 2001 From: jwoodhead Date: Tue, 2 Jul 2024 09:33:15 -0500 Subject: [PATCH 109/140] Include offset argument for lagInFrame and leadInFrame window functions. Fixes #65952 --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..530eaae7283 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples From 8dfa8d6df48e82e321641a239ccd715c4d188c62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 16:42:42 +0200 Subject: [PATCH 110/140] Add more Azure profile events + AzureUploadPart to AzureStageBlock --- src/Common/ProfileEvents.cpp | 8 +++++-- src/Coordination/KeeperConstants.cpp | 10 +++++++-- .../IO/WriteBufferFromAzureBlobStorage.cpp | 21 +++++++++++++++++++ .../copyAzureBlobStorageFile.cpp | 17 ++++++++++----- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..eaff2cf8856 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -447,14 +447,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..76541db6112 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index a2d21cf49c2..d1324e22978 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,6 +14,15 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB @@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize() /// then we use single part upload instead of multi part upload if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); + auto part_data = std::move(detached_part_data.front()); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); @@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage: auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..43052f661b3 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,14 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -156,6 +160,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -259,9 +267,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -333,7 +341,6 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); From 4ac30aa7d578acf00928ac2301fa7b50da9a040f Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 17:18:14 +0200 Subject: [PATCH 111/140] Fxi style --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 43052f661b3..6386c7a3c76 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,12 +16,10 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUpload; extern const Event AzureStageBlock; extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUpload; extern const Event DiskAzureStageBlock; extern const Event DiskAzureCommitBlockList; } From b64c1dc711b3f52bfef9f05b13812acd6d683244 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:20:29 +0200 Subject: [PATCH 112/140] Update index.md --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 530eaae7283..01fae9d9040 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. +- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. ## Examples From 2598daa65aab4d58fae1cc5c69ebe9257c189f6b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:29:48 +0200 Subject: [PATCH 113/140] small fix of docs --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 01fae9d9040..49076f3cbe1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. -- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. +- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. ## Examples From b5af73a299986c457ba42f59c0d39a53ab4d9053 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 15:48:10 +0000 Subject: [PATCH 114/140] Better --- src/Client/ClientBase.cpp | 7 ++----- src/Client/ClientBase.h | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 56573c15f32..5d472ba99b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - { + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; - cancelled_printed = true; - } } @@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream() if (is_interactive) { - if (cancelled && !cancelled_printed) + if (cancelled && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) output_stream << "Ok." << std::endl; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 756400137ad..30dc4168996 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -338,8 +338,8 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; - bool cancelled_printed = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; /// Unpacked descriptors and streams for the ease of use. int in_fd = STDIN_FILENO; From 0ed34661243e918a81d8823dbec8917ef88ab3b2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 15:30:16 +0000 Subject: [PATCH 115/140] Cleanup FunctionArgumentDescriptor const char * can be nullptr, std::string_view can't. --- src/Functions/FunctionHelpers.cpp | 33 ++++--- src/Functions/FunctionHelpers.h | 85 ++++++++----------- src/Functions/FunctionStringReplace.h | 2 +- src/Functions/FunctionTokens.h | 4 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsAES.h | 4 +- src/Functions/FunctionsConversion.cpp | 6 +- src/Functions/FunctionsRound.h | 2 +- src/Functions/JSONArrayLength.cpp | 2 +- src/Functions/URL/URLHierarchy.cpp | 2 +- src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- src/Functions/URL/extractURLParameters.cpp | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 2 +- src/Functions/array/arrayRandomSample.cpp | 2 +- src/Functions/array/arrayShingles.cpp | 2 +- src/Functions/arrayStringConcat.cpp | 2 +- src/Functions/castOrDefault.cpp | 6 +- src/Functions/countMatches.h | 2 +- src/Functions/dateTimeToSnowflakeID.cpp | 4 +- src/Functions/extractAll.cpp | 2 +- src/Functions/extractAllGroups.h | 2 +- src/Functions/extractGroups.cpp | 2 +- src/Functions/formatQuery.cpp | 2 +- src/Functions/fromDaysSinceYearZero.cpp | 2 +- src/Functions/generateSnowflakeID.cpp | 2 +- src/Functions/generateUUIDv4.cpp | 4 +- src/Functions/generateUUIDv7.cpp | 2 +- src/Functions/makeDate.cpp | 14 +-- src/Functions/parseDateTime.cpp | 2 +- src/Functions/parseReadableSize.cpp | 2 +- src/Functions/regexpExtract.cpp | 2 +- src/Functions/repeat.cpp | 2 +- src/Functions/seriesDecomposeSTL.cpp | 2 +- src/Functions/seriesOutliersDetectTukey.cpp | 2 +- src/Functions/snowflake.cpp | 8 +- src/Functions/snowflakeIDToDateTime.cpp | 4 +- src/Functions/space.cpp | 2 +- src/Functions/timestamp.cpp | 2 +- src/Functions/toDecimalString.cpp | 2 +- 40 files changed, 108 insertions(+), 122 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 593646240ca..0027f9f281f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,7 +97,7 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description) + const char * type_name) { if (arguments.size() <= argument_index) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", @@ -106,7 +106,7 @@ void validateArgumentType(const IFunction & func, const DataTypes & arguments, const auto & argument = arguments[argument_index]; if (!validator_func(*argument)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description); + argument->getName(), argument_index, func.getName(), type_name); } namespace @@ -120,9 +120,7 @@ void validateArgumentsImpl(const IFunction & func, { const auto argument_index = i + argument_offset; if (argument_index >= arguments.size()) - { break; - } const auto & arg = arguments[i + argument_offset]; const auto & descriptor = descriptors[i]; @@ -130,10 +128,10 @@ void validateArgumentsImpl(const IFunction & func, throw Exception(error_code, "Illegal type of argument #{}{} of function {}{}{}", argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), + " '" + String(descriptor.name) + "'", func.getName(), - (descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), - (arg.type ? ", got " + arg.type->getName() : String{})); + String(", expected ") + String(descriptor.type_name), + arg.type ? ", got " + arg.type->getName() : String{}); } } @@ -141,19 +139,22 @@ void validateArgumentsImpl(const IFunction & func, int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const { - if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) + if (name.empty() || type_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set"); + + if (type_validator && (data_type == nullptr || !type_validator(*data_type))) return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; - if (column_validator_func && (column == nullptr || !column_validator_func(*column))) + if (column_validator && (column == nullptr || !column_validator(*column))) return ErrorCodes::ILLEGAL_COLUMN; return 0; } -void validateFunctionArgumentTypes(const IFunction & func, - const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args) +void validateFunctionArguments(const IFunction & func, + const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args) { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { @@ -165,10 +166,8 @@ void validateFunctionArgumentTypes(const IFunction & func, using A = std::decay_t; if constexpr (std::is_same_v) { - if (a.argument_name) - result += "'" + std::string(a.argument_name) + "' : "; - if (a.expected_type_description) - result += a.expected_type_description; + result += "'" + String(a.name) + "' : "; + result += a.type_name; } else if constexpr (std::is_same_v) result += a.type->getName(); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 6267d8eacc4..c08eb5265c1 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -119,73 +119,60 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName /// throws if there is no argument at specified index or if predicate returns false. void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description); + const char * type_name); -/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected - * - * Also it is used to generate function description when arguments do not match expected ones. - * Any field can be null: - * `argument_name` - if not null, reported via type check errors. - * `expected_type_description` - if not null, reported via type check errors. - * `type_validator_func` - if not null, used to validate data type of function argument. - * `column_validator_func` - if not null, used to validate column of function argument. - */ +/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided +/// arguments match the expected arguments. struct FunctionArgumentDescriptor { - const char * argument_name; + /// The argument name, e.g. "longitude". + /// Should not be empty. + std::string_view name; + /// A function which validates the argument data type. + /// May be nullptr. using TypeValidator = bool (*)(const IDataType &); - TypeValidator type_validator_func; + TypeValidator type_validator; + + /// A function which validates the argument column. + /// May be nullptr. using ColumnValidator = bool (*)(const IColumn &); - ColumnValidator column_validator_func; + ColumnValidator column_validator; - const char * expected_type_description; + /// The expected argument type, e.g. "const String" or "UInt64". + /// Should not be empty. + std::string_view type_name; - /** Validate argument type and column. - * - * Returns non-zero error code if: - * Validator != nullptr && (Value == nullptr || Validator(*Value) == false) - * For: - * Validator is either `type_validator_func` or `column_validator_func` - * Value is either `data_type` or `column` respectively. - * ILLEGAL_TYPE_OF_ARGUMENT if type validation fails - * - */ + /// Validate argument type and column. int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; }; using FunctionArgumentDescriptors = std::vector; -/** Validate that function arguments match specification. - * - * Designed to simplify argument validation for functions with variable arguments - * (e.g. depending on result type or other trait). - * First, checks that number of arguments is as expected (including optional arguments). - * Second, checks that mandatory args present and have valid type. - * Third, checks optional arguments types, skipping ones that are missing. - * - * Please note that if you have several optional arguments, like f([a, b, c]), - * only these calls are considered valid: - * f(a) - * f(a, b) - * f(a, b, c) - * - * But NOT these: f(a, c), f(b, c) - * In other words you can't omit middle optional arguments (just like in regular C++). - * - * If any mandatory arg is missing, throw an exception, with explicit description of expected arguments. - */ -void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args = {}); +/// Validates that the user-provided arguments match the expected arguments. +/// +/// Checks that +/// - the number of provided arguments matches the number of mandatory/optional arguments, +/// - all mandatory arguments are present and have the right type, +/// - optional arguments - if present - have the right type. +/// +/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid: +/// f(a) +/// f(a, b) +/// f(a, b, c) +/// but these are NOT: +/// f(a, c) +/// f(b, c) +void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args = {}); /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. std::pair, const ColumnArray::Offset *> checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); -/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. - * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. - */ +/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns. +/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); struct NullPresence diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index aee04a5969a..b4bcfa514a8 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -40,7 +40,7 @@ public: {"replacement", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index d6cf6a24983..0ca47126198 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) @@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index c418163343b..e282bcfbfe2 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -47,7 +47,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 524b4f82acd..7af6265eba9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -165,7 +165,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, @@ -438,7 +438,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2a0b2f1d075..f3e54d2fbd9 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -2020,7 +2020,7 @@ public: DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if constexpr (to_decimal) @@ -2049,7 +2049,7 @@ public: optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if constexpr (std::is_same_v) { @@ -2390,7 +2390,7 @@ public: if (isDateTime64(arguments)) { - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..7a907e56a7d 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -647,7 +647,7 @@ public: FunctionArgumentDescriptors optional_args{ {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return arguments[0].type; } diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 84e87061398..73dd55f1266 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -48,7 +48,7 @@ namespace {"json", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index c08f41f06ee..0f565df8172 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 7c796116b8d..2cb5995e375 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 16ace36d39b..b3d51d02162 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 43079834872..ce2aadaeede 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 87f3390ac73..7db20667888 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -87,7 +87,7 @@ public: {"array_1", static_cast(&isArray), nullptr, "Array"}, {"array_2", static_cast(&isArray), nullptr, "Array"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared>(); } diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index b08a73b93f3..6e176b6e33d 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -39,7 +39,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); // Return an array with the same nested type as the input array const DataTypePtr & array_type = arguments[0].type; diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index 8932482c69c..7c97d8136fb 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -31,7 +31,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"length", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].type.get()); return std::make_shared(std::make_shared(array_type->getNestedType())); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 421408c01f2..12bab410fec 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -159,7 +159,7 @@ public: {"separator", static_cast(&isString), isColumnConst, "const String"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 44b39811882..995b5fa91e7 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -203,7 +203,7 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if (isDecimal(type) || isDateTime64(type)) @@ -212,9 +212,9 @@ private: if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); - optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); + optional_args.push_back({"default_value", nullptr, nullptr, "any type"}); - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); size_t additional_argument_index = 1; diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index fbbb9d017ee..5f07b936e26 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -38,7 +38,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index 968a7628ca5..c48f8c13152 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -43,7 +43,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } @@ -91,7 +91,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index 5801a7b8f4f..4a3eb32474c 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -59,7 +59,7 @@ public: {"pattern", static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index dfcd0e31715..7732855b211 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -74,7 +74,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index f62352af0bd..ac6266a2e82 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -48,7 +48,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 3b632147864..655ea2e7cde 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: FunctionArgumentDescriptors args{ {"query", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr string_type = std::make_shared(); if (error_handling == ErrorHandling::Null) diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index b98c587d172..0543e6bf229 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -54,7 +54,7 @@ public: { FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 8ac010deafc..a171b6bf86e 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,7 +167,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index b0fec43fe94..a928f9009c8 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -30,9 +30,9 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "any type"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..a9ed08d9f83 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -163,7 +163,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 3d8b8617472..41a09793994 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -87,7 +87,7 @@ public: {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } else { @@ -95,7 +95,7 @@ public: {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } return std::make_shared(); @@ -193,7 +193,7 @@ public: {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -357,7 +357,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -440,7 +440,7 @@ public: {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if (arguments.size() >= mandatory_argument_names.size() + 1) { @@ -572,7 +572,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -652,7 +652,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional precision argument auto precision = DEFAULT_PRECISION; diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 162b8c58873..339eb4cb26c 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -589,7 +589,7 @@ namespace {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp index f5c2c53439b..1abcf7f164f 100644 --- a/src/Functions/parseReadableSize.cpp +++ b/src/Functions/parseReadableSize.cpp @@ -68,7 +68,7 @@ public: { {"readable_size", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr return_type = std::make_shared(); if constexpr (error_handling == ErrorHandling::Null) return std::make_shared(return_type); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index cfb42580cb0..3cc5393296c 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -54,7 +54,7 @@ public: if (arguments.size() == 3) args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 7f2fe646062..aa90bf2490d 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -201,7 +201,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 618808b64ed..720aa1e0799 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -45,7 +45,7 @@ public: {"time_series", static_cast(&isArray), nullptr, "Array"}, {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared(std::make_shared())); } diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 81fc904e16e..4063d0ab85b 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -51,7 +51,7 @@ public: {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 5ff8a636058..31ea6a28ece 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -64,7 +64,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -121,7 +121,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) @@ -190,7 +190,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -255,7 +255,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index b799792a56f..9a1d5b8a74b 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -56,7 +56,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) @@ -127,7 +127,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 83183c991bc..ce12f2f541c 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -48,7 +48,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index fbca08b0968..6f2bd2030d5 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -46,7 +46,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(DATETIME_SCALE); } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index fc621b272de..4ee664ad237 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -43,7 +43,7 @@ public: {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + validateFunctionArguments(*this, arguments, mandatory_args, {}); return std::make_shared(); } From 1821638d5e3e2ee8fbea278c67e6d757f03c4253 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 16:38:00 +0000 Subject: [PATCH 116/140] Replace validateArgumentType() by validateFunctionArguments() --- src/Functions/FunctionHelpers.cpp | 14 -------------- src/Functions/FunctionHelpers.h | 6 ------ src/Functions/geohashDecode.cpp | 7 +++++-- src/Functions/geohashEncode.cpp | 22 +++++++++------------- src/Functions/geohashesInBox.cpp | 25 ++++++++++++++----------- 5 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 0027f9f281f..236afc5ecbf 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -95,20 +95,6 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName return res; } -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name) -{ - if (arguments.size() <= argument_index) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", - func.getName()); - - const auto & argument = arguments[argument_index]; - if (!validator_func(*argument)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), argument_index, func.getName(), type_name); -} - namespace { void validateArgumentsImpl(const IFunction & func, diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index c08eb5265c1..4f93b236bcb 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -115,12 +115,6 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// column if it is nullable. ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); -/// Checks argument type at specified index with predicate. -/// throws if there is no argument at specified index or if predicate returns false. -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name); - /// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided /// arguments match the expected arguments. struct FunctionArgumentDescriptor diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b2454f5dffc..96ad7dacfc4 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -38,9 +38,12 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + FunctionArgumentDescriptors args{ + {"encoded", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArguments(*this, arguments, args); return std::make_shared( DataTypes{std::make_shared(), std::make_shared()}, diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 7c353b822aa..034c8188b63 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -40,19 +39,16 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", - getName()); - } + FunctionArgumentDescriptors mandatory_args{ + {"longitude", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude", static_cast(&isFloat), nullptr, "Float*"} + }; + FunctionArgumentDescriptors optional_args{ + {"precision", static_cast(&isInteger), nullptr, "(U)Int*"} + }; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index ac8d4a6ad8f..9429903dda7 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -35,22 +35,25 @@ public: size_t getNumberOfArguments() const override { return 5; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - validateArgumentType(*this, arguments, 2, isFloat, "float"); - validateArgumentType(*this, arguments, 3, isFloat, "float"); - validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + FunctionArgumentDescriptors args{ + {"longitute_min", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_min", static_cast(&isFloat), nullptr, "Float*"}, + {"longitute_max", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_max", static_cast(&isFloat), nullptr, "Float*"}, + {"precision", static_cast(&isUInt8), nullptr, "UInt8"} + }; + validateFunctionArguments(*this, arguments, args); - if (!(arguments[0]->equals(*arguments[1]) && - arguments[0]->equals(*arguments[2]) && - arguments[0]->equals(*arguments[3]))) + if (!(arguments[0].type->equals(*arguments[1].type) && + arguments[0].type->equals(*arguments[2].type) && + arguments[0].type->equals(*arguments[3].type))) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of {} all coordinate arguments must have the same type, " - "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), - arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); + "instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(), + arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName()); } return std::make_shared(std::make_shared()); From 659020dc8695e974241723f1fa49fc66bcc1c478 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 17:42:33 +0000 Subject: [PATCH 117/140] More aesthetic error messages --- src/Functions/FunctionHelpers.cpp | 60 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 236afc5ecbf..b30f38d3d76 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,6 +97,19 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName namespace { + +String withOrdinalEnding(size_t i) +{ + switch (i) + { + case 0: return "1st"; + case 1: return "2nd"; + case 2: return "3rd"; + default: return std::to_string(i) + "th"; + } + +} + void validateArgumentsImpl(const IFunction & func, const ColumnsWithTypeAndName & arguments, size_t argument_offset, @@ -112,12 +125,12 @@ void validateArgumentsImpl(const IFunction & func, const auto & descriptor = descriptors[i]; if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) throw Exception(error_code, - "Illegal type of argument #{}{} of function {}{}{}", - argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - " '" + String(descriptor.name) + "'", + "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}", + withOrdinalEnding(argument_offset + i), + descriptor.name, func.getName(), - String(", expected ") + String(descriptor.type_name), - arg.type ? ", got " + arg.type->getName() : String{}); + descriptor.type_name, + arg.type ? arg.type->getName() : ""); } } @@ -144,34 +157,19 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto join_argument_types = [](const auto & args, const String sep = ", ") - { - String result; - for (const auto & a : args) - { - using A = std::decay_t; - if constexpr (std::is_same_v) - { - result += "'" + String(a.name) + "' : "; - result += a.type_name; - } - else if constexpr (std::is_same_v) - result += a.type->getName(); - - result += sep; - } - - if (!args.empty()) - result.erase(result.end() - sep.length(), result.end()); - - return result; - }; + auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", - func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), - mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), - join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); + "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", + func.getName(), + (!mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) + : (!mandatory_args.empty() && optional_args.empty()) + ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" + : (mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) + : "0 arguments", + fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } validateArgumentsImpl(func, arguments, 0, mandatory_args); From 073471530b1e6bc8f08b959b4071cd0a376f24e1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 2 Jul 2024 20:30:46 +0200 Subject: [PATCH 118/140] fix test --- tests/queries/0_stateless/01158_zookeeper_log_long.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 55d4162fc48..804cdf48fb6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -29,14 +29,20 @@ select 'parts'; select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0' + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; select 'blocks'; select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500)) +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500) + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; drop table rmt sync; From 0afccecd6b4048414d672918f5351fe80abd4548 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:05:23 +0000 Subject: [PATCH 119/140] Fix build --- src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/seriesPeriodDetectFFT.cpp | 2 +- src/Functions/sqid.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 083179c3ca8..363b9ee3a31 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -202,7 +202,7 @@ public: {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); + validateFunctionArguments(*this, arguments, mandatory_arguments); return std::make_shared(); } diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index e85b3a97c67..471354235d5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -53,7 +53,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 6679646fef4..0e133590b84 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -100,7 +100,7 @@ public: FunctionArgumentDescriptors args{ {"sqid", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } From 54c4f02dca9fc0d33d717c7fb4122834ac214ae9 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 15:22:48 -0700 Subject: [PATCH 120/140] [Docs] Better wording for behavior of MATERIALIZED expr --- docs/en/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0253bc647e6..b866d0b9f5f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -152,7 +152,7 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. +Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. From 4680489c374248684feb0ea6ad78fcea4e81d7b0 Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Wed, 3 Jul 2024 00:31:50 +0200 Subject: [PATCH 121/140] Improve messaging around the JSON object datatype - Changed the title - Improve messaging around the future of this feature --- .../data-types/{json.md => object-data-type.md} | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) rename docs/en/sql-reference/data-types/{json.md => object-data-type.md} (79%) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/object-data-type.md similarity index 79% rename from docs/en/sql-reference/data-types/json.md rename to docs/en/sql-reference/data-types/object-data-type.md index 39e37abad82..0a3f780569f 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -1,24 +1,19 @@ --- -slug: /en/sql-reference/data-types/json +slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 -sidebar_label: JSON +sidebar_label: Object Data Type --- -# JSON +# Object Data Type :::note -This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. +This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) ::: Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. -:::note -The JSON data type is an obsolete feature. Do not use it. -If you want to use it, set `allow_experimental_object_type = 1`. -::: - ## Example **Example 1** From e06d1d81603e10453ef42e38e14bb33bdd6939e5 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 17:49:42 -0700 Subject: [PATCH 122/140] Formatting --- docs/en/sql-reference/data-types/object-data-type.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/object-data-type.md index 0a3f780569f..cb9c0810c84 100644 --- a/docs/en/sql-reference/data-types/object-data-type.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -2,6 +2,7 @@ slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 sidebar_label: Object Data Type +keywords: [object, data type] --- # Object Data Type @@ -44,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json **Example 2** -To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: +To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: ```sql CREATE TABLE logs @@ -64,7 +65,7 @@ FROM file('access.json.gz', JSONAsString) ## Displaying JSON columns -When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`: +When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`: ```sql SET output_format_json_named_tuples_as_objects = 1 @@ -78,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow ## Related Content +- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) From ca49cbafd96cfd0972a859c369f98265a84959f3 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 17:50:06 -0700 Subject: [PATCH 123/140] Fix link --- docs/en/sql-reference/data-types/object-data-type.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/object-data-type.md index cb9c0810c84..c29be2cff58 100644 --- a/docs/en/sql-reference/data-types/object-data-type.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -8,7 +8,7 @@ keywords: [object, data type] # Object Data Type :::note -This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) +This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) ::: Stores JavaScript Object Notation (JSON) documents in a single column. From 67aca82d9e3565463588b942cf73581303dd47f2 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 18:09:40 -0700 Subject: [PATCH 124/140] Revert file name change (changing slug is sufficient) --- docs/en/sql-reference/data-types/{object-data-type.md => json.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/en/sql-reference/data-types/{object-data-type.md => json.md} (100%) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/json.md similarity index 100% rename from docs/en/sql-reference/data-types/object-data-type.md rename to docs/en/sql-reference/data-types/json.md From 6079373ce3ef1107bc7ea634c6d1e1ceac24744d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 08:36:16 +0000 Subject: [PATCH 125/140] Incorporate review feedback --- src/Functions/FunctionHelpers.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index b30f38d3d76..c658063b66f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -157,18 +157,22 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; + auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; }; + + String expected_args_string; + if (!mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)); + else if (!mandatory_args.empty() && optional_args.empty()) + expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments" + else if (mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)); + else + expected_args_string = "0 arguments"; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", func.getName(), - (!mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) - : (!mandatory_args.empty() && optional_args.empty()) - ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" - : (mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) - : "0 arguments", + expected_args_string, fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } From 1f309ef342360ba2207a2ed1e7eb87c0eaa9cfde Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:03:32 +0200 Subject: [PATCH 126/140] Bump From c86cdbb243c9093e3eb59134de08beb42f1d4c02 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 11:05:34 +0200 Subject: [PATCH 127/140] Remove scary jemalloc log --- programs/server/Server.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..f992fdc13a9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -133,10 +133,6 @@ # include #endif -#if USE_JEMALLOC -# include -#endif - #if USE_AZURE_BLOB_STORAGE # include # include @@ -176,34 +172,10 @@ namespace ProfileEvents namespace fs = std::filesystem; -#if USE_JEMALLOC -static bool jemallocOptionEnabled(const char *name) -{ - bool value; - size_t size = sizeof(value); - - if (mallctl(name, reinterpret_cast(&value), &size, /* newp= */ nullptr, /* newlen= */ 0)) - throw Poco::SystemException("mallctl() failed"); - - return value; -} -#else -static bool jemallocOptionEnabled(const char *) { return false; } -#endif - int mainEntryClickHouseServer(int argc, char ** argv) { DB::Server app; - if (jemallocOptionEnabled("opt.background_thread")) - { - LOG_ERROR(&app.logger(), - "jemalloc.background_thread was requested, " - "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, " - "and also background_thread is not compatible with ClickHouse watchdog " - "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)"); - } - /// Do not fork separate process from watchdog if we attached to terminal. /// Otherwise it breaks gdb usage. /// Can be overridden by environment variable (cannot use server config at this moment). From 198b80b6a252ef25f8b4f269a53c39ec5ae0e76f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 16:36:43 +0000 Subject: [PATCH 128/140] Cosmetics No code was harmed in the process (really just cosmetics). --- src/Storages/Statistics/Statistics.cpp | 40 +++++++++---------- src/Storages/Statistics/Statistics.h | 28 +++++-------- src/Storages/Statistics/TDigestStatistics.cpp | 34 ++++++++-------- src/Storages/Statistics/TDigestStatistics.h | 8 +--- src/Storages/Statistics/UniqStatistics.cpp | 2 +- src/Storages/Statistics/UniqStatistics.h | 3 +- 6 files changed, 48 insertions(+), 67 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fed0bd61c03..a4c57c9eef4 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include #include @@ -10,6 +7,8 @@ #include #include #include +#include + namespace DB { @@ -20,7 +19,6 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -/// Version / bitmask of statistics / data of statistics / enum StatisticsFileVersion : UInt16 { V0 = 0, @@ -29,17 +27,15 @@ enum StatisticsFileVersion : UInt16 IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_), rows(0) + : stats_desc(stats_desc_) { } void ColumnStatistics::update(const ColumnPtr & column) { rows += column->size(); - for (const auto & iter : stats) - { - iter.second->update(column); - } + for (const auto & stat : stats) + stat.second->update(column); } Float64 ColumnStatistics::estimateLess(Float64 val) const @@ -76,14 +72,17 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; for (const auto & [type, _]: stats) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// We write some basic statistics + + /// store the column row count as it is always useful writeIntBinary(rows, buf); - /// We write complex statistics - for (const auto & [type, stat_ptr]: stats) + + /// write the actual statistics object + for (const auto & [type, stat_ptr] : stats) stat_ptr->serialize(buf); } @@ -96,7 +95,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf) UInt64 stat_types_mask = 0; readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) { if (!(stat_types_mask & 1 << UInt8(it->first))) @@ -136,15 +137,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va { if (!validators.emplace(stats_type, std::move(validator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); - } MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(StatisticsType::TDigest, TDigestCreator); - registerCreator(StatisticsType::Uniq, UniqCreator); registerValidator(StatisticsType::TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::Uniq, UniqValidator); + registerCreator(StatisticsType::Uniq, UniqCreator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -159,9 +160,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st { auto it = validators.find(type); if (it == validators.end()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type); it->second(desc, data_type); } } @@ -173,10 +172,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); - } + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2ab1337af02..5e756e48d42 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,19 +1,15 @@ #pragma once -#include -#include - #include -#include #include #include #include +#include namespace DB { -/// this is for user-defined statistic. constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; @@ -25,11 +21,9 @@ class IStatistics { public: explicit IStatistics(const SingleStatisticsDescription & stat_); - virtual ~IStatistics() = default; virtual void serialize(WriteBuffer & buf) = 0; - virtual void deserialize(ReadBuffer & buf) = 0; virtual void update(const ColumnPtr & column) = 0; @@ -43,11 +37,12 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); - String getFileName() const; + String getFileName() const; const String & columnName() const; UInt64 rowCount() const; @@ -55,17 +50,14 @@ public: void update(const ColumnPtr & column); Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; private: - friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; std::map stats; - UInt64 rows; /// the number of rows of the column + UInt64 rows = 0; /// the number of rows in the column }; class ColumnsDescription; @@ -79,25 +71,23 @@ public: void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + using Validator = std::function; using Creator = std::function; - using Validator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticsType type, Creator creator); void registerValidator(StatisticsType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; using Validators = std::unordered_map; - Creators creators; + using Creators = std::unordered_map; Validators validators; + Creators creators; }; } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index aa5662c979d..2f254b604e4 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -8,53 +8,53 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): - IStatistics(stat_) +TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) + : IStatistics(stat_) { } Float64 TDigestStatistics::estimateLess(Float64 val) const { - return data.getCountLessThan(val); + return t_digest.getCountLessThan(val); } Float64 TDigestStatistics::estimateEqual(Float64 val) const { - return data.getCountEqual(val); + return t_digest.getCountEqual(val); } void TDigestStatistics::serialize(WriteBuffer & buf) { - data.serialize(buf); + t_digest.serialize(buf); } void TDigestStatistics::deserialize(ReadBuffer & buf) { - data.deserialize(buf); + t_digest.deserialize(buf); } void TDigestStatistics::update(const ColumnPtr & column) { - size_t size = column->size(); + size_t rows = column->size(); - for (size_t i = 0; i < size; ++i) + for (size_t row = 0; row < rows; ++row) { /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); } } +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); +} + StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) -{ - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); -} - } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 7c361b8751f..2e29becc5ee 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -6,27 +6,23 @@ namespace DB { - -/// TDigestStatistic is a kind of histogram. class TDigestStatistics : public IStatistics { public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; private: - QuantileTDigest data; + QuantileTDigest t_digest; }; -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index fc748e769ca..2f7a75db504 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -55,7 +55,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index 0d86a6e458a..bf097620a86 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -17,7 +17,6 @@ public: UInt64 getCardinality(); void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; @@ -30,7 +29,7 @@ private: }; -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } From 337871e0ec0f8d1c6c89d5b0d39977ea689adc22 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 18:02:58 +0000 Subject: [PATCH 129/140] Move some methods around Makes the order of methods within classes consistent. Did not touch the code itself. --- src/Storages/Statistics/Statistics.cpp | 5 +++- src/Storages/Statistics/Statistics.h | 4 +-- src/Storages/Statistics/TDigestStatistics.cpp | 28 +++++++++---------- src/Storages/Statistics/TDigestStatistics.h | 7 +++-- src/Storages/Statistics/UniqStatistics.cpp | 20 ++++++------- src/Storages/Statistics/UniqStatistics.h | 6 ++-- 6 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index a4c57c9eef4..5666f0bbf18 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -24,7 +24,10 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; -IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) + : stat(stat_) +{ +} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) : stats_desc(stats_desc_) diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 5e756e48d42..4af7c423257 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -23,11 +23,11 @@ public: explicit IStatistics(const SingleStatisticsDescription & stat_); virtual ~IStatistics() = default; + virtual void update(const ColumnPtr & column) = 0; + virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; - virtual void update(const ColumnPtr & column) = 0; - protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index 2f254b604e4..0e2cc8bac6d 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -13,14 +13,16 @@ TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) { } -Float64 TDigestStatistics::estimateLess(Float64 val) const +void TDigestStatistics::update(const ColumnPtr & column) { - return t_digest.getCountLessThan(val); -} + size_t rows = column->size(); -Float64 TDigestStatistics::estimateEqual(Float64 val) const -{ - return t_digest.getCountEqual(val); + for (size_t row = 0; row < rows; ++row) + { + /// TODO: support more types. + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); + } } void TDigestStatistics::serialize(WriteBuffer & buf) @@ -33,16 +35,14 @@ void TDigestStatistics::deserialize(ReadBuffer & buf) t_digest.deserialize(buf); } -void TDigestStatistics::update(const ColumnPtr & column) +Float64 TDigestStatistics::estimateLess(Float64 val) const { - size_t rows = column->size(); + return t_digest.getCountLessThan(val); +} - for (size_t row = 0; row < rows; ++row) - { - /// TODO: support more types. - Float64 value = column->getFloat64(row); - t_digest.add(value, 1); - } +Float64 TDigestStatistics::estimateEqual(Float64 val) const +{ + return t_digest.getCountEqual(val); } void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 2e29becc5ee..a9fbc0410f3 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -11,13 +11,14 @@ class TDigestStatistics : public IStatistics public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + Float64 estimateLess(Float64 val) const; + Float64 estimateEqual(Float64 val) const; + private: QuantileTDigest t_digest; }; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index 2f7a75db504..267654656cd 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -25,11 +25,13 @@ UniqStatistics::~UniqStatistics() collector->destroy(data); } -UInt64 UniqStatistics::getCardinality() +void UniqStatistics::update(const ColumnPtr & column) { - auto column = DataTypeUInt64().createColumn(); - collector->insertResultInto(data, *column, nullptr); - return column->getUInt(0); + /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. + /// Here we intend to avoid crash in CI. + auto col_ptr = column->convertToFullColumnIfLowCardinality(); + const IColumn * raw_ptr = col_ptr.get(); + collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } void UniqStatistics::serialize(WriteBuffer & buf) @@ -42,13 +44,11 @@ void UniqStatistics::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -void UniqStatistics::update(const ColumnPtr & column) +UInt64 UniqStatistics::getCardinality() { - /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. - /// Here we intend to avoid crash in CI. - auto col_ptr = column->convertToFullColumnIfLowCardinality(); - const IColumn * raw_ptr = col_ptr.get(); - collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); } void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index bf097620a86..4f28f80f9cb 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -11,18 +11,16 @@ class UniqStatistics : public IStatistics { public: UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; - UInt64 getCardinality(); + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + UInt64 getCardinality(); private: - std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; From 9f4e44bfc44a00dde015410d0c62e71b7cc000d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:02:29 +0000 Subject: [PATCH 130/140] Rename XStatistics to StatisticsX Makes the naming more consistent with the rest of the codebase, e.g. - MergeTreeIndexSet - MergeTreeIndexMinMax or - StorageJoin - StorageMergeTree etc. --- src/Storages/Statistics/Statistics.cpp | 12 ++++++------ ...igestStatistics.cpp => StatisticsTDigest.cpp} | 16 ++++++++-------- .../{TDigestStatistics.h => StatisticsTDigest.h} | 4 ++-- .../{UniqStatistics.cpp => StatisticsUniq.cpp} | 16 ++++++++-------- .../{UniqStatistics.h => StatisticsUniq.h} | 6 +++--- src/Storages/Statistics/tests/gtest_stats.cpp | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) rename src/Storages/Statistics/{TDigestStatistics.cpp => StatisticsTDigest.cpp} (67%) rename src/Storages/Statistics/{TDigestStatistics.h => StatisticsTDigest.h} (84%) rename src/Storages/Statistics/{UniqStatistics.cpp => StatisticsUniq.cpp} (79%) rename src/Storages/Statistics/{UniqStatistics.h => StatisticsUniq.h} (82%) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 5666f0bbf18..c454adccc06 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -44,7 +44,7 @@ void ColumnStatistics::update(const ColumnPtr & column) Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,12 +57,12 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); + auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) /// for every bucket. - if (uniq_static->getCardinality() < 2048) + if (statistics_uniq->getCardinality() < 2048) { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); + auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); return tdigest_static->estimateEqual(val); } } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp similarity index 67% rename from src/Storages/Statistics/TDigestStatistics.cpp rename to src/Storages/Statistics/StatisticsTDigest.cpp index 0e2cc8bac6d..0747197370c 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,12 +8,12 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) : IStatistics(stat_) { } -void TDigestStatistics::update(const ColumnPtr & column) +void StatisticsTDigest::update(const ColumnPtr & column) { size_t rows = column->size(); @@ -25,22 +25,22 @@ void TDigestStatistics::update(const ColumnPtr & column) } } -void TDigestStatistics::serialize(WriteBuffer & buf) +void StatisticsTDigest::serialize(WriteBuffer & buf) { t_digest.serialize(buf); } -void TDigestStatistics::deserialize(ReadBuffer & buf) +void StatisticsTDigest::deserialize(ReadBuffer & buf) { t_digest.deserialize(buf); } -Float64 TDigestStatistics::estimateLess(Float64 val) const +Float64 StatisticsTDigest::estimateLess(Float64 val) const { return t_digest.getCountLessThan(val); } -Float64 TDigestStatistics::estimateEqual(Float64 val) const +Float64 StatisticsTDigest::estimateEqual(Float64 val) const { return t_digest.getCountEqual(val); } @@ -54,7 +54,7 @@ void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { - return std::make_shared(stat); + return std::make_shared(stat); } } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/StatisticsTDigest.h similarity index 84% rename from src/Storages/Statistics/TDigestStatistics.h rename to src/Storages/Statistics/StatisticsTDigest.h index a9fbc0410f3..f391d0b17e6 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -6,10 +6,10 @@ namespace DB { -class TDigestStatistics : public IStatistics +class StatisticsTDigest : public IStatistics { public: - explicit TDigestStatistics(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/StatisticsUniq.cpp similarity index 79% rename from src/Storages/Statistics/UniqStatistics.cpp rename to src/Storages/Statistics/StatisticsUniq.cpp index 267654656cd..4e24e5f0e96 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) : IStatistics(stat_) { arena = std::make_unique(); @@ -20,12 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const collector->create(data); } -UniqStatistics::~UniqStatistics() +StatisticsUniq::~StatisticsUniq() { collector->destroy(data); } -void UniqStatistics::update(const ColumnPtr & column) +void StatisticsUniq::update(const ColumnPtr & column) { /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. /// Here we intend to avoid crash in CI. @@ -34,17 +34,17 @@ void UniqStatistics::update(const ColumnPtr & column) collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } -void UniqStatistics::serialize(WriteBuffer & buf) +void StatisticsUniq::serialize(WriteBuffer & buf) { collector->serialize(data, buf); } -void UniqStatistics::deserialize(ReadBuffer & buf) +void StatisticsUniq::deserialize(ReadBuffer & buf) { collector->deserialize(data, buf); } -UInt64 UniqStatistics::getCardinality() +UInt64 StatisticsUniq::getCardinality() { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); @@ -60,7 +60,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/StatisticsUniq.h similarity index 82% rename from src/Storages/Statistics/UniqStatistics.h rename to src/Storages/Statistics/StatisticsUniq.h index 4f28f80f9cb..1c521fa9984 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -7,11 +7,11 @@ namespace DB { -class UniqStatistics : public IStatistics +class StatisticsUniq : public IStatistics { public: - UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; + StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + ~StatisticsUniq() override; void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index f94f310be56..c3c14632ba1 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { From 2cefa56f9b640f14b020660ec0296fe7bb6669a9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:59:52 +0000 Subject: [PATCH 131/140] Update docs --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 6 +++--- docs/en/sql-reference/statements/alter/statistics.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f0c4e1b0e34..3826e4e9c94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `TDigest` - Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. - `Uniq` - - Estimate the number of distinct values of a column by HyperLogLog. + + [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. ## Column-level Settings {#column-level-settings} diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 80024781f88..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns: ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` -:::note +:::note Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: From bd7e613b3b21589710e11b043577959ce340e2c5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:27:40 +0000 Subject: [PATCH 132/140] Minor cleanup of 02864_statistics_exception --- .../02864_statistics_exception.sql | 78 +++++++++---------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql index c531d39cd69..289ffee6600 100644 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -1,57 +1,55 @@ -DROP TABLE IF EXISTS t1; +-- Tests creating/dropping/materializing statistics produces the right exceptions. -CREATE TABLE t1 +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } SET allow_experimental_statistics = 1; -CREATE TABLE t1 +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64, - pk String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- Unknown statistics types are rejected +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab ( - a Float64 STATISTICS(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE t1 +CREATE TABLE tab ( a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; + b String +) Engine = MergeTree() ORDER BY tuple(); -ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- Statistics can be created only on integer columns -ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS a; -ALTER TABLE t1 DROP STATISTICS IF EXISTS a; -ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; -ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } -DROP TABLE t1; +DROP TABLE tab; From c390ecdb4df7e090ea94f10adda47dd73864c71c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:11:04 +0000 Subject: [PATCH 133/140] Rename 02864_statistics_operate --> 02864_statistics_ddl --- .../02864_statistics_ddl.reference | 31 ++++++++++ .../0_stateless/02864_statistics_ddl.sql | 59 +++++++++++++++++++ .../02864_statistics_operate.reference | 31 ---------- .../0_stateless/02864_statistics_operate.sql | 57 ------------------ 4 files changed, 90 insertions(+), 88 deletions(-) create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.reference create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.sql diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference new file mode 100644 index 00000000000..a7ff5caa0b0 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -0,0 +1,31 @@ +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql new file mode 100644 index 00000000000..fe612efe2ac --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -0,0 +1,59 @@ +-- Tests that various DDL statements create/drop/materialize statistics + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference deleted file mode 100644 index 6398a9bd000..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.reference +++ /dev/null @@ -1,31 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 diff --git a/tests/queries/0_stateless/02864_statistics_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql deleted file mode 100644 index bf69c11bc91..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.sql +++ /dev/null @@ -1,57 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; - -ALTER TABLE t1 DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 MATERIALIZE STATISTICS a, b; -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -ALTER TABLE t1 RENAME COLUMN b TO c; -SHOW CREATE TABLE t1; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS t1; From 4f0916caa5ee07c3c612641782288ee42adfd92a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 09:56:38 +0000 Subject: [PATCH 134/140] Rename test 03164_materialize_statistics --> 02864_statistics_materialize_in_merge Consistency with existing statistics tests - 02864_statistics_operate - 02864_statistics_exception - 02864_statistics_uniq --- ...statistics_materialize_in_merge.reference} | 0 .../02864_statistics_materialize_in_merge.sql | 52 +++++++++++++++++++ .../03164_materialize_statistics.sql | 49 ----------------- 3 files changed, 52 insertions(+), 49 deletions(-) rename tests/queries/0_stateless/{03164_materialize_statistics.reference => 02864_statistics_materialize_in_merge.reference} (100%) create mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql delete mode 100644 tests/queries/0_stateless/03164_materialize_statistics.sql diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference similarity index 100% rename from tests/queries/0_stateless/03164_materialize_statistics.reference rename to tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..3e15ec1148e --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql deleted file mode 100644 index 43c5724dd59..00000000000 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ /dev/null @@ -1,49 +0,0 @@ -DROP TABLE IF EXISTS t_statistics_materialize; - -SET allow_experimental_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET materialize_statistics_on_insert = 0; - -CREATE TABLE t_statistics_materialize -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE t_statistics_materialize FINAL; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE t_statistics_materialize; -SET mutations_sync = 2; - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE t_statistics_materialize; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' - AND type = 'QueryFinish'; From 6ccb26b1aa6c1d79e0388a72afd4b0f9edb2ea1c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:26:19 +0000 Subject: [PATCH 135/140] Switch to a virtual interface to get rid of static_pointer_cast --- src/Storages/Statistics/Statistics.cpp | 42 ++++++++++++++++----- src/Storages/Statistics/Statistics.h | 9 +++++ src/Storages/Statistics/StatisticsTDigest.h | 4 +- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- src/Storages/Statistics/StatisticsUniq.h | 2 +- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index c454adccc06..28e75c6d244 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -41,10 +41,35 @@ void ColumnStatistics::update(const ColumnPtr & column) stat.second->update(column); } +UInt64 IStatistics::estimateCardinality() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateEqual(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateLess(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); +} + +/// ------------------------------------- +/// Implementation of the estimation: +/// Note: Each statistics object supports certain types predicates natively, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. +/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics +/// object that supports it natively. + Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); - /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) - /// for every bucket. - if (statistics_uniq->getCardinality() < 2048) - { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); - return tdigest_static->estimateEqual(val); - } + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. + if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) + return stats.at(StatisticsType::TDigest)->estimateEqual(val); } if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; @@ -72,6 +92,8 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const return rows * ConditionSelectivityEstimator::default_good_cond_factor; } +/// ------------------------------------- + void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); @@ -81,7 +103,7 @@ void ColumnStatistics::serialize(WriteBuffer & buf) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// store the column row count as it is always useful + /// as the column row count is always useful, save it in any case writeIntBinary(rows, buf); /// write the actual statistics object diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 4af7c423257..d4364075d1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -28,6 +28,15 @@ public: virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; + /// Estimate the cardinality of the column. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual UInt64 estimateCardinality() const; + + /// Per-value estimations. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column + virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column + protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index f391d0b17e6..d3a3bf115ee 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -16,8 +16,8 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + Float64 estimateLess(Float64 val) const override; + Float64 estimateEqual(Float64 val) const override; private: QuantileTDigest t_digest; diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 4e24e5f0e96..bf9a40ea8cb 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -44,7 +44,7 @@ void StatisticsUniq::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -UInt64 StatisticsUniq::getCardinality() +UInt64 StatisticsUniq::estimateCardinality() const { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 1c521fa9984..5290585bd94 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -18,7 +18,7 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - UInt64 getCardinality(); + UInt64 estimateCardinality() const override; private: std::unique_ptr arena; From 6d3d33638ac45b8d7e6fd2d788335a40539548e8 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:22:33 +0200 Subject: [PATCH 136/140] Fix lock-order-inversion in DatabaseCatalog --- src/Interpreters/DatabaseCatalog.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..841decf29c5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -274,10 +274,12 @@ void DatabaseCatalog::shutdownImpl() database->shutdown(); } + TablesMarkedAsDropped tables_marked_dropped_to_destroy; { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.clear(); + tables_marked_dropped.swap(tables_marked_dropped_to_destroy); } + tables_marked_dropped_to_destroy.clear(); std::lock_guard lock(databases_mutex); for (const auto & db : databases) From 2a1c13b070fbb3ad38ad8820f004b8687dab9425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 14:25:31 +0200 Subject: [PATCH 137/140] Add comment about the changes --- base/base/itoa.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index c17a2bfd999..60231507c96 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -119,6 +119,13 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) if (n < U(1e2)) { + /// This is changed from the original jeaiii implementation + /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions + /// and a memory access (no need to read digits.fd[n]) + /// This is not true for pure random numbers, but that's not the common use case of a database + /// Original jeaii code + // *reinterpret_cast(b) = digits.fd[n]; + // return n < 10 ? b + 1 : b + 2; return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) From 97e2c8c7d220aa073d3bbbe0f1a9624ab28a2076 Mon Sep 17 00:00:00 2001 From: Mikhail Gorshkov Date: Wed, 3 Jul 2024 12:56:20 +0000 Subject: [PATCH 138/140] PR review follow-up --- src/Functions/FunctionsRound.h | 88 ++-------------------------------- 1 file changed, 4 insertions(+), 84 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index d43f7f264b4..357b8c03044 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -586,62 +586,8 @@ struct Dispatcher const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_res = col_res->getData(); - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - vec_res.resize(1); - auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - if (scale_arg == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else if (scale_arg > 0) - { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - } - /// Non-const scale argument: - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale_data = scale_col_typed->getData(); - const size_t rows = scale_data.size(); - - vec_res.resize(rows); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale_data[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - if (raw_scale == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else if (raw_scale > 0) - { - size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else - { - size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - } - } - return col_res; + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } @@ -697,34 +643,8 @@ public: const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto col_res = ColumnDecimal::create(1, col->getScale()); - auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); - return col_res; - } - /// Non-const scale argument: - if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale = scale_col_typed->getData(); - const size_t rows = scale.size(); - auto col_res = ColumnDecimal::create(rows, col->getScale()); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - DecimalRoundingImpl::applyOne(value_data, col->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); - } - return col_res; - } + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } From 88601ae86914ea152fc0ae8e5fd74fe030598b18 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jul 2024 15:25:12 +0200 Subject: [PATCH 139/140] avoid conflicts in SettingsChangesHistory --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index 56d6fecf4b8..dd94a48f8e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/*.reference -crlf +src/Core/SettingsChangesHistory.cpp merge=union From ee3c530817d7cf76e27ed61c1b2be532acf3b32c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:40:41 +0000 Subject: [PATCH 140/140] Remove obsolete comment --- src/Functions/generateUUIDv7.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..b1807a3fd35 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -11,20 +11,6 @@ namespace /* Bit layouts of UUIDv7 -without counter: - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - -with counter: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤